diff --git a/.gitignore b/.gitignore index 02d1512ac..0258e4e2e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ /tests/helpers/*pyc # Extra files +/src/pg_crc.c /src/datapagemap.c /src/datapagemap.h /src/logging.h @@ -43,3 +44,18 @@ /src/xlogreader.c /src/walmethods.c /src/walmethods.h +/src/instr_time.h + +# Doc files +/doc/*html + +# Docker files +/docker-compose.yml +/Dockerfile +/Dockerfile.in +/run_tests.sh +/make_dockerfile.sh +/backup_restore.sh + +# Misc +.python-version diff --git a/.travis.yml b/.travis.yml index 35b49ec5b..fc7ecc059 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,47 @@ -sudo: required +os: linux + +dist: bionic + +language: c services: -- docker + - docker + +before_install: + - cp travis/* . + +install: + - ./make_dockerfile.sh + - docker-compose build script: -- docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh + - docker-compose run tests + # - docker-compose run $(bash <(curl -s https://codecov.io/env)) tests + # - docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh + +notifications: + email: + on_success: change + on_failure: always + +# Default MODE is basic, i.e. all tests with PG_PROBACKUP_TEST_BASIC=ON +env: + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=archive + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=backup + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=compression + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=delta + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=locking + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=merge + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=page + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=replica + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=retention + - PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=restore + - PG_VERSION=11 PG_BRANCH=REL_11_STABLE + - PG_VERSION=10 PG_BRANCH=REL_10_STABLE + - PG_VERSION=9.6 PG_BRANCH=REL9_6_STABLE + - PG_VERSION=9.5 PG_BRANCH=REL9_5_STABLE + +jobs: + allow_failures: + - if: env(MODE) IN (archive, backup, delta, locking, merge, replica, retention, restore) diff --git a/COPYRIGHT b/COPYRIGHT deleted file mode 100644 index 49d704724..000000000 --- a/COPYRIGHT +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2015-2017, Postgres Professional -Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - -Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group -Portions Copyright (c) 1994, The Regents of the University of California - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the NIPPON TELEGRAPH AND TELEPHONE CORPORATION - (NTT) nor the names of its contributors may be used to endorse or - promote products derived from this software without specific prior - written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Documentation.md b/Documentation.md new file mode 100644 index 000000000..943ea0cfd --- /dev/null +++ b/Documentation.md @@ -0,0 +1,2096 @@ +# pg_probackup + +pg_probackup is a utility to manage backup and recovery of PostgreSQL database clusters. It is designed to perform periodic backups of the PostgreSQL instance that enable you to restore the server in case of a failure. pg_probackup supports PostgreSQL 9.5 or higher. + +Current version - 2.2.5 + +1. [Synopsis](#synopsis) +2. [Versioning](#versioning) +3. [Overview](#overview) + * [Limitations](#limitations) + +4. [Installation and Setup](#installation-and-setup) + * [Initializing the Backup Catalog](#initializing-the-backup-catalog) + * [Adding a New Backup Instance](#adding-a-new-backup-instance) + * [Configuring the Database Cluster](#configuring-the-database-cluster) + * [Setting up STREAM Backups](#setting-up-stream-backups) + * [Setting up Continuous WAL Archiving](#setting-up-continuous-wal-archiving) + * [Setting up Backup from Standby](#setting-up-backup-from-standby) + * [Setting up Cluster Verification](#setting-up-cluster-verification) + * [Setting up Partial Restore](#setting-up-partial-restore) + * [Configuring the Remote Mode](#configuring-the-remote-mode) + * [Setting up PTRACK Backups](#setting-up-ptrack-backups) + +5. [Usage](#usage) + * [Creating a Backup](#creating-a-backup) + * [ARCHIVE WAL mode](#archive-mode) + * [STREAM WAL mode](#stream-mode) + * [Page validation](#page-validation) + * [External directories](#external-directories) + * [Verifying a Cluster](#verifying-a-cluster) + * [Validating a Backup](#validating-a-backup) + * [Restoring a Cluster](#restoring-a-cluster) + * [Partial Restore](#partial-restore) + * [Performing Point-in-Time (PITR) Recovery](#performing-point-in-time-pitr-recovery) + * [Using pg_probackup in the Remote Mode](#using-pg_probackup-in-the-remote-mode) + * [Running pg_probackup on Parallel Threads](#running-pg_probackup-on-parallel-threads) + * [Configuring pg_probackup](#configuring-pg_probackup) + * [Managing the Backup Catalog](#managing-the-backup-catalog) + * [Viewing Backup Information](#viewing-backup-information) + * [Viewing WAL Archive Information](#viewing-wal-archive-information) + * [Configuring Retention Policy](#configuring-retention-policy) + * [Backup Retention Policy](#backup-retention-policy) + * [Backup Pinning](#backup-pinning) + * [WAL Archive Retention Policy](#wal-archive-retention-policy) + * [Merging Backups](#merging-backups) + * [Deleting Backups](#deleting-backups) + +6. [Command-Line Reference](#command-line-reference) + * [Commands](#commands) + * [version](#version) + * [help](#help) + * [init](#init) + * [add-instance](#add-instance) + * [del-instance](#del-instance) + * [set-config](#set-config) + * [set-backup](#set-backup) + * [show-config](#show-config) + * [show](#show) + * [backup](#backup) + * [restore](#restore) + * [checkdb](#checkdb) + * [validate](#validate) + * [merge](#merge) + * [delete](#delete) + * [archive-push](#archive-push) + * [archive-get](#archive-get) + * [Options](#options) + * [Common Options](#common-options) + * [Recovery Target Options](#recovery-target-options) + * [Retention Options](#retention-options) + * [Pinning Options](#pinning-options) + * [Logging Options](#logging-options) + * [Connection Options](#connection-options) + * [Compression Options](#compression-options) + * [Archiving Options](#archiving-options) + * [Remote Mode Options](#remote-mode-options) + * [Remote WAL Archive Options](#remote-wal-archive-options) + * [Partial Restore Options](#partial-restore-options) + * [Replica Options](#replica-options) + +7. [Howto](#howto) + * [Minimal setup](#minimal-setup) +8. [Authors](#authors) +9. [Credits](#credits) + + +## Synopsis + +`pg_probackup version` + +`pg_probackup help [command]` + +`pg_probackup init -B backup_dir` + +`pg_probackup add-instance -B backup_dir -D data_dir --instance instance_name` + +`pg_probackup del-instance -B backup_dir --instance instance_name` + +`pg_probackup set-config -B backup_dir --instance instance_name [option...]` + +`pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id [option...]` + +`pg_probackup show-config -B backup_dir --instance instance_name [--format=format]` + +`pg_probackup show -B backup_dir [option...]` + +`pg_probackup backup -B backup_dir --instance instance_name -b backup_mode [option...]` + +`pg_probackup restore -B backup_dir --instance instance_name [option...]` + +`pg_probackup checkdb -B backup_dir --instance instance_name [-D data_dir] [option...]` + +`pg_probackup validate -B backup_dir [option...]` + +`pg_probackup merge -B backup_dir --instance instance_name -i backup_id [option...]` + +`pg_probackup delete -B backup_dir --instance instance_name { -i backup_id | --delete-wal | --delete-expired | --merge-expired } [option...]` + +`pg_probackup archive-push -B backup_dir --instance instance_name --wal-file-path=wal_file_path --wal-file-name=wal_file_name [option...]` + +`pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=wal_file_path --wal-file-name=wal_file_name [option...]` + + +## Versioning + +pg_probackup is following the [semantic](https://semver.org/) versioning. + +## Overview + +As compared to other backup solutions, pg_probackup offers the following benefits that can help you implement different backup strategies and deal with large amounts of data: + +- Incremental backup: page-level incremental backup allows you to save disk space, speed up backup and restore. With three different incremental modes you can plan the backup strategy in accordance with your data flow +- Validation: automatic data consistency checks and on-demand backup validation without actual data recovery +- Verification: on-demand verification of PostgreSQL instance via dedicated command `checkdb` +- Retention: managing WAL archive and backups in accordance with retention policies - Time and/or Redundancy based, with two retention methods: `delete expired` and `merge expired`. Additionally you can design you own retention policy by setting 'time to live' for backups +- Parallelization: running backup, restore, merge, delete, verificaton and validation processes on multiple parallel threads +- Compression: storing backup data in a compressed state to save disk space +- Deduplication: saving disk space by not copying the not changed non-data files ('_vm', '_fsm', etc) +- Remote operations: backup PostgreSQL instance located on remote machine or restore backup on it +- Backup from replica: avoid extra load on the master server by taking backups from a standby +- External directories: add to backup content of directories located outside of the PostgreSQL data directory (PGDATA), such as scripts, configs, logs and pg_dump files +- Backup Catalog: get list of backups and corresponding meta information in `plain` or `json` formats +- Archive Catalog: get list of all WAL timelines and corresponding meta information in `plain` or `json` formats +- Partial Restore: restore only the specified databases or skip the specified databases. + +To manage backup data, pg_probackup creates a `backup catalog`. This is a directory that stores all backup files with additional meta information, as well as WAL archives required for point-in-time recovery. You can store backups for different instances in separate subdirectories of a single backup catalog. + +Using pg_probackup, you can take full or incremental [backups](#creating-a-backup): + +- FULL backups contain all the data files required to restore the database cluster. +- Incremental backups only store the data that has changed since the previous backup. It allows to decrease the backup size and speed up backup and restore operations. pg_probackup supports the following modes of incremental backups: + - DELTA backup. In this mode, pg_probackup reads all data files in the data directory and copies only those pages that has changed since the previous backup. Note that this mode can impose read-only I/O pressure equal to a full backup. + - PAGE backup. In this mode, pg_probackup scans all WAL files in the archive from the moment the previous full or incremental backup was taken. Newly created backups contain only the pages that were mentioned in WAL records. This requires all the WAL files since the previous backup to be present in the WAL archive. If the size of these files is comparable to the total size of the database cluster files, speedup is smaller, but the backup still takes less space. You have to configure WAL archiving as explained in the section [Setting up continuous WAL archiving](#setting-up-continuous-wal-archiving) to make PAGE backups. + - PTRACK backup. In this mode, PostgreSQL tracks page changes on the fly. Continuous archiving is not necessary for it to operate. Each time a relation page is updated, this page is marked in a special PTRACK bitmap for this relation. As one page requires just one bit in the PTRACK fork, such bitmaps are quite small. Tracking implies some minor overhead on the database server operation, but speeds up incremental backups significantly. + +pg_probackup can take only physical online backups, and online backups require WAL for consistent recovery. So regardless of the chosen backup mode (FULL, PAGE or DELTA), any backup taken with pg_probackup must use one of the following `WAL delivery modes`: + +- [ARCHIVE](#archive-mode). Such backups rely on [continuous archiving](#setting-up-continuous-wal-archiving) to ensure consistent recovery. This is the default WAL delivery mode. +- [STREAM](#stream-mode). Such backups include all the files required to restore the cluster to a consistent state at the time the backup was taken. Regardless of [continuous archiving](#setting-up-continuous-wal-archiving) been set up or not, the WAL segments required for consistent recovery are streamed (hence STREAM) via replication protocol during backup and included into the backup files. Because of that backups of this WAL mode are called `autonomous` or `standalone`. + +### Limitations + +pg_probackup currently has the following limitations: + +- Only PostgreSQL of versions 9.5 and newer are supported. +- Currently remode mode of operations is not supported on Windows systems. +- On Unix systems backup of PostgreSQL verions =< 10 is possible only by the same OS user PostgreSQL server is running by. For example, if PostgreSQL server is running by user *postgres*, then backup must be run by user *postgres*. If backup is running in [remote mode](#using-pg_probackup-in-the-remote-mode) using `ssh`, then this limitation apply differently: value for `--remote-user` option should be *postgres*. +- During backup of PostgreSQL 9.5 functions `pg_create_restore_point(text)` and `pg_switch_xlog()` will be executed only if backup role is superuser. Because of that backup of a cluster with low amount of WAL traffic with non-superuser role may take more time than backup of the same cluster with superuser role. +- The PostgreSQL server from which the backup was taken and the restored server must be compatible by the [block_size](https://www.postgresql.org/docs/current/runtime-config-preset.html#GUC-BLOCK-SIZE) and [wal_block_size](https://www.postgresql.org/docs/current/runtime-config-preset.html#GUC-WAL-BLOCK-SIZE) parameters and have the same major release number. Also depending on cluster configuration PostgreSQL itself may apply additional restrictions such as CPU architecture platform and libc/libicu versions. +- Incremental chain can span only within one timeline. So if you have backup incremental chain taken from replica and it gets promoted, you would be forced to take another FULL backup. + +## Installation and Setup + +Once you have pg_probackup installed, complete the following setup: + +- Initialize the backup catalog. +- Add a new backup instance to the backup catalog. +- Configure the database cluster to enable pg_probackup backups. +- Optionally, configure SSH for running pg_probackup operations in remote mode. + +### Initializing the Backup Catalog + +pg_probackup stores all WAL and backup files in the corresponding subdirectories of the backup catalog. + +To initialize the backup catalog, run the following command: + + pg_probackup init -B backup_dir + +Where *backup_dir* is the path to backup catalog. If the *backup_dir* already exists, it must be empty. Otherwise, pg_probackup returns an error. + +The user launching pg_probackup must have full access to *backup_dir* directory. + +pg_probackup creates the backup_dir backup catalog, with the following subdirectories: + +- wal/ — directory for WAL files. +- backups/ — directory for backup files. + +Once the backup catalog is initialized, you can add a new backup instance. + +### Adding a New Backup Instance + +pg_probackup can store backups for multiple database clusters in a single backup catalog. To set up the required subdirectories, you must add a backup instance to the backup catalog for each database cluster you are going to back up. + +To add a new backup instance, run the following command: + + pg_probackup add-instance -B backup_dir -D data_dir --instance instance_name [remote_options] + +Where: + +- *data_dir* is the data directory of the cluster you are going to back up. To set up and use pg_probackup, write access to this directory is required. +- *instance_name* is the name of the subdirectories that will store WAL and backup files for this cluster. +- The optional parameters [remote_options](#remote-mode-options) should be used if *data_dir* is located on remote machine. + +pg_probackup creates the *instance_name* subdirectories under the 'backups/' and 'wal/' directories of the backup catalog. The 'backups/*instance_name*' directory contains the 'pg_probackup.conf' configuration file that controls pg_probackup settings for this backup instance. If you run this command with the [remote_options](#remote-mode-options), used parameters will be added to pg_probackup.conf. + +For details on how to fine-tune pg_probackup configuration, see the section [Configuring pg_probackup](#configuring-pg_probackup). + +The user launching pg_probackup must have full access to *backup_dir* directory and at least read-only access to *data_dir* directory. If you specify the path to the backup catalog in the `BACKUP_PATH` environment variable, you can omit the corresponding option when running pg_probackup commands. + +>NOTE: For PostgreSQL >= 11 it is recommended to use [allow-group-access](https://www.postgresql.org/docs/11/app-initdb.html#APP-INITDB-ALLOW-GROUP-ACCESS) feature, so backup can be done by any OS user in the same group as the cluster owner. In this case the user should have read permissions on the cluster directory. + +### Configuring the Database Cluster + +Although pg_probackup can be used by a superuser, it is recommended to create a separate role with the minimum permissions required for the chosen backup strategy. In these configuration instructions, the *backup* role is used as an example. + +To perform [backup](#backup), the following permissions for role *backup* are required only in database **used for connection** to PostgreSQL server: + +For PostgreSQL 9.5: +``` +BEGIN; +CREATE ROLE backup WITH LOGIN; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +COMMIT; +``` + +For PostgreSQL 9.6: +``` +BEGIN; +CREATE ROLE backup WITH LOGIN; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_xlog_replay_location() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_checkpoint() TO backup; +COMMIT; +``` + +For PostgreSQL >= 10: +``` +BEGIN; +CREATE ROLE backup WITH LOGIN; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_checkpoint() TO backup; +COMMIT; +``` + +In the [pg_hba.conf](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html) file, allow connection to database cluster on behalf of the *backup* role. + +Since pg_probackup needs to read cluster files directly, pg_probackup must be started by (in case of remote backup - connected to) OS user that has read access to all files and directories inside the data directory (PGDATA) you are going to back up. + +Depending on whether you are plan to take [autonomous](#stream-mode) and/or [archive](#archive-mode) backups, PostgreSQL cluster configuration will differ, as specified in the sections below. To back up the database cluster from a standby server, run pg_probackup in remote mode or create PTRACK backups, additional setup is required. + +For details, see the sections [Setting up STREAM Backups](#setting-up-stream-backups), [Setting up continuous WAL archiving](#setting-up-continuous-wal-archiving), [Setting up Backup from Standby](#setting-up-backup-from-standby), [Configuring the Remote Mode](#configuring-the-remote-mode), [Setting up Partial Restore](#setting-up-partial-restore) and [Setting up PTRACK Backups](#setting-up-ptrack-backups). + +### Setting up STREAM Backups + +To set up the cluster for [STREAM](#stream-mode) backups, complete the following steps: + +- Grant the REPLICATION privilege to the backup role: + + ALTER ROLE backup WITH REPLICATION; + +- In the [pg_hba.conf](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html) file, allow replication on behalf of the *backup* role. +- Make sure the parameter [max_wal_senders](https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-MAX-WAL-SENDERS) is set high enough to leave at least one session available for the backup process. +- Set the parameter [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL) to be higher than `minimal`. + +If you are planning to take PAGE backups in STREAM mode or perform PITR with STREAM backups, you still have to configure WAL archiving as explained in the section [Setting up continuous WAL archiving](#setting-up-continuous-wal-archiving). + +Once these steps are complete, you can start taking FULL, PAGE, DELTA and PTRACK backups with [STREAM](#stream-mode) WAL mode. + +### Setting up continuous WAL archiving + +Making backups in PAGE backup mode, performing [PITR](#performing-point-in-time-pitr-recovery) and making backups with [ARCHIVE](#archive-mode) WAL delivery mode require [continuous WAL archiving](https://www.postgresql.org/docs/current/continuous-archiving.html) to be enabled. To set up continuous archiving in the cluster, complete the following steps: + +- Make sure the [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-WAL-LEVEL) parameter is higher than `minimal`. +- If you are configuring archiving on master, [archive_mode](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-MODE) must be set to `on` or `always`. To perform archiving on standby, set this parameter to `always`. +- Set the [archive_command](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND) parameter, as follows: + + archive_command = 'pg_probackup archive-push -B backup_dir --instance instance_name --wal-file-path=%p --wal-file-name=%f [remote_options]' + +Where *backup_dir* and *instance_name* refer to the already initialized backup catalog instance for this database cluster and optional parameters [remote_options](#remote-mode-options) should be used to archive WAL to the remote host. For details about all possible `archive-push` parameters, see the section [archive-push](#archive-push). + +Once these steps are complete, you can start making backups with [ARCHIVE](#archive-mode) WAL-mode, backups in PAGE backup mode and perform [PITR](#performing-point-in-time-pitr-recovery). + +Current state of WAL Archive can be obtained via [show](#show) command. For details, see the sections [Viewing WAL Archive information](#viewing-wal-archive-information). + +If you are planning to make PAGE backups and/or backups with [ARCHIVE](#archive-mode) WAL mode from a standby of a server, that generates small amount of WAL traffic, without long waiting for WAL segment to fill up, consider setting [archive_timeout](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-TIMEOUT) PostgreSQL parameter **on master**. It is advisable to set the value of this setting slightly lower than pg_probackup parameter `--archive-timeout` (default 5 min), so there should be enough time for rotated segment to be streamed to replica and send to archive before backup is aborted because of `--archive-timeout`. + +>NOTE: using pg_probackup command [archive-push](#archive-push) for continuous archiving is optional. You can use any other tool you like as long as it delivers WAL segments into '*backup_dir*/wal/*instance_name*' directory. If compression is used, it should be `gzip`, and '.gz' suffix in filename is mandatory. + +>NOTE: Instead of `archive_mode`+`archive_command` method you may opt to use the utility [pg_receivewal](https://www.postgresql.org/docs/current/app-pgreceivewal.html). In this case pg_receivewal `-D directory` option should point to '*backup_dir*/wal/*instance_name*' directory. WAL compression that could be done by pg_receivewal is supported by pg_probackup. `Zero Data Loss` archive strategy can be achieved only by using pg_receivewal. + +### Setting up Backup from Standby + +For PostgreSQL 9.6 or higher, pg_probackup can take backups from a standby server. This requires the following additional setup: + +- On the standby server, set the parameter [hot_standby](https://www.postgresql.org/docs/current/runtime-config-replication.html#GUC-HOT-STANDBY) to `on`. +- On the master server, set the parameter [full_page_writes](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-FULL-PAGE-WRITES) to `on`. +- To perform autonomous backups on standby, complete all steps in section [Setting up STREAM Backups](#setting-up-stream-backups) +- To perform archive backups on standby, complete all steps in section [Setting up continuous WAL archiving](#setting-up-continuous-wal-archiving) + +Once these steps are complete, you can start taking FULL, PAGE, DELTA or PTRACK backups with appropriate WAL delivery mode: ARCHIVE or STREAM, from the standby server. + +Backup from the standby server has the following limitations: + +- If the standby is promoted to the master during backup, the backup fails. +- All WAL records required for the backup must contain sufficient full-page writes. This requires you to enable `full_page_writes` on the master, and not to use a tools like pg_compresslog as [archive_command](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND) to remove full-page writes from WAL files. + +### Setting up Cluster Verification + +Logical verification of database cluster requires the following additional setup. Role *backup* is used as an example: + +- Install extension [amcheck](https://www.postgresql.org/docs/current/amcheck.html) or [amcheck_next](https://github.com/petergeoghegan/amcheck) **in every database** of the cluster: + + CREATE EXTENSION amcheck; + +- To perform logical verification the following permissions are required **in every database** of the cluster: + +``` +GRANT SELECT ON TABLE pg_catalog.pg_am TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_class TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_namespace TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; +GRANT EXECUTE ON FUNCTION bt_index_check(oid) TO backup; +GRANT EXECUTE ON FUNCTION bt_index_check(oid, bool) TO backup; +``` + +### Setting up Partial Restore + +If you are plalling to use partial restore, complete the following additional step: + +- Grant the read-only acces to 'pg_catalog.pg_database' to the *backup* role only in database **used for connection** to PostgreSQL server: + + GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; + +### Configuring the Remote Mode + +pg_probackup supports the remote mode that allows to perform backup, restore and WAL archiving operations remotely. In this mode, the backup catalog is stored on a local system, while PostgreSQL instance to backup and/or to restore is located on a remote system. Currently the only supported remote protocol is SSH. + +#### Setup SSH + +If you are going to use pg_probackup in remote mode via ssh, complete the following steps: + +- Install pg_probackup on both systems: `backup_host` and `db_host`. +- For communication between the hosts setup the passwordless SSH connection between *backup* user on `backup_host` and *postgres* user on `db_host`: + + [backup@backup_host] ssh-copy-id postgres@db_host + +- If you are planning to rely on [continuous WAL archiving](#setting-up-continuous-wal-archiving), then setup passwordless SSH connection between *postgres* user on `db_host` and *backup* user on `backup_host`: + + [postgres@db_host] ssh-copy-id backup@backup_host + +Where: + +- *backup_host* is the system with *backup catalog*. +- *db_host* is the system with PostgreSQL cluster. +- *backup* is the OS user on *backup_host* used to run pg_probackup. +- *postgres* is the OS user on *db_host* used to run PostgreSQL cluster. Note, that for PostgreSQL versions >= 11, a more secure approach can used thanks to [allow-group-access](https://www.postgresql.org/docs/11/app-initdb.html#APP-INITDB-ALLOW-GROUP-ACCESS) feature. + +pg_probackup in remote mode via `ssh` works as follows: + +- only the following commands can be launched in remote mode: [add-instance](#add-instance), [backup](#backup), [restore](#restore), [archive-push](#archive-push), [archive-get](#archive-get). +- when started in remote mode the main pg_probackup process on local system connects via ssh to remote system and launches there number of agent proccesses equal to specified value of option `-j/--threads`. +- the main pg_probackup process use remote agents to access remote files and transfer data between local and remote systems. +- remote agents are smart and capable of handling some logic on their own to minimize the network traffic and number of round-trips between hosts. +- usually the main proccess is started on *backup_host* and connects to *db_host*, but in case of `archive-push` and `archive-get` commands the main process is started on *db_host* and connects to *backup_host*. +- after completition of data transfer the remote agents are terminated and ssh connections are closed. +- if an error condition is encountered by a remote agent, then all agents are terminated and error details are reported by the main pg_probackup process, which exits with error. +- compression is always done on *db_host*. +- decompression is always done on *backup_host*. + +>NOTE: You can improse [additional restrictions](https://man.openbsd.org/OpenBSD-current/man8/sshd.8#AUTHORIZED_KEYS_FILE_FORMAT) on ssh settings to protect the system in the event of account compromise. + +### Setting up PTRACK Backups + +Backup mode PTACK can be used only on Postgrespro Standart and Postgrespro Enterprise installations or patched vanilla PostgreSQL. Links to ptrack patches can be found [here](https://github.com/postgrespro/pg_probackup#ptrack-support). + +If you are going to use PTRACK backups, complete the following additional steps: + +- Set the parameter `ptrack_enable` to `on`. +- Grant the rights to execute `ptrack` functions to the *backup* role **in every database** of the cluster: + + GRANT EXECUTE ON FUNCTION pg_catalog.pg_ptrack_clear() TO backup; + GRANT EXECUTE ON FUNCTION pg_catalog.pg_ptrack_get_and_clear(oid, oid) TO backup; + +- The *backup* role must have access to all the databases of the cluster. + +## Usage + +### Creating a Backup + +To create a backup, run the following command: + + pg_probackup backup -B backup_dir --instance instance_name -b backup_mode + +Where *backup_mode* can take one of the following values: + +- FULL — creates a full backup that contains all the data files of the cluster to be restored. +- DELTA — reads all data files in the data directory and creates an incremental backup for pages that have changed since the previous backup. +- PAGE — creates an incremental PAGE backup based on the WAL files that have generated since the previous full or incremental backup was taken. Only changed blocks are readed from data files. +- PTRACK — creates an incremental PTRACK backup tracking page changes on the fly. + +When restoring a cluster from an incremental backup, pg_probackup relies on the parent full backup and all the incremental backups between them, which is called `the backup chain`. You must create at least one full backup before taking incremental ones. + +#### ARCHIVE mode + +ARCHIVE is the default WAL delivery mode. + +For example, to make a FULL backup in ARCHIVE mode, run: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL + +Unlike backup in STREAM mode, ARCHIVE backup rely on [continuous archiving](#setting-up-continuous-wal-archiving) to provide WAL segments required to restore the cluster to a consistent state at the time the backup was taken. + +During [backup](#backup) pg_probackup ensures that WAL files containing WAL records between START LSN and STOP LSN are actually exists in '*backup_dir*/wal/*instance_name*' directory. Also pg_probackup ensures that WAL records between START LSN and STOP LSN can be parsed. This precations eliminates the risk of silent WAL corruption. + +#### STREAM mode + +STREAM is the optional WAL delivery mode. + +For example, to make a FULL backup in STREAM mode, add the `--stream` flag to the command from the previous example: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL --stream --temp-slot + +The optional `--temp-slot` flag ensures that the required segments remain available if the WAL is rotated before the backup is complete. + +Unlike backup in ARCHIVE mode, STREAM backup include all the WAL segments required to restore the cluster to a consistent state at the time the backup was taken. + +During [backup](#backup) pg_probackup streams WAL files containing WAL records between START LSN and STOP LSN to '*backup_dir*/backups/*instance_name*/*BACKUP ID*/database/pg_wal' directory. Also pg_probackup ensures that WAL records between START LSN and STOP LSN can be parsed. This precations eliminates the risk of silent WAL corruption. + +Even if you are using [continuous archiving](#setting-up-continuous-wal-archiving), STREAM backups can still be useful in the following cases: + +- STREAM backups can be restored on the server that has no file access to WAL archive. +- STREAM backups enable you to restore the cluster state at the point in time for which WAL files in archive are no longer available. +- Backup in STREAM mode can be taken from standby of a server, that generates small amount of WAL traffic, without long waiting for WAL segment to fill up. + +#### Page validation + +If [data checksums](https://www.postgresql.org/docs/current/runtime-config-preset.html#GUC-DATA-CHECKSUMS) are enabled in the database cluster, pg_probackup uses this information to check correctness of data files during backup. While reading each page, pg_probackup checks whether the calculated checksum coincides with the checksum stored in the page header. This guarantees that the PostgreSQL instance and backup itself are free of corrupted pages. +Note that pg_probackup reads database files directly from filesystem, so under heavy write load during backup it can show false positive checksum failures because of partial writes. In case of page checksumm mismatch, page is readed again and checksumm comparison repeated. + +Page is considered corrupted if checksumm comparison failed more than 100 times, in this case backup is aborted. + +Redardless of data checksums been enabled or not, pg_probackup always check page header "sanity". + +#### External directories + +To back up a directory located outside of the data directory, use the optional `--external-dirs` parameter that specifies the path to this directory. If you would like to add more than one external directory, provide several paths separated by colons, on Windows system paths must be separated by semicolon instead. + +For example, to include `'/etc/dir1/'` and `'/etc/dir2/'` directories into the full backup of your *instance_name* instance that will be stored under the *backup_dir* directory, run: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL --external-dirs=/etc/dir1:/etc/dir2 + +For example, to include `'C:\dir1\'` and `'C:\dir2\'` directories into the full backup of your *instance_name* instance that will be stored under the *backup_dir* directory on Windows system, run: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL --external-dirs=C:\dir1;C:\dir2 + +pg_probackup creates a separate subdirectory in the backup directory for each external directory. Since external directories included into different backups do not have to be the same, when you are restoring the cluster from an incremental backup, only those directories that belong to this particular backup will be restored. Any external directories stored in the previous backups will be ignored. + +To include the same directories into each backup of your instance, you can specify them in the pg_probackup.conf configuration file using the [set-config](#set-config) command with the `--external-dirs` option. + +### Verifying a Cluster + +To verify that PostgreSQL database cluster is free of corruption, run the following command: + + pg_probackup checkdb [-B backup_dir [--instance instance_name]] [-D data_dir] + +This physical verification works similar to [page validation](#page-validation) that is done during backup with several differences: + +- `checkdb` is read-only +- if corrupted page is detected, `checkdb` is not aborted, but carry on, until all pages in the cluster are validated +- `checkdb` do not strictly require *the backup catalog*, so it can be used to verify database clusters that are **not** [added to the backup catalog](#adding-a-new-backup-instance). + +If *backup_dir* and *instance_name* are omitted, then [connection options](#connection-options) and *data_dir* must be provided via environment variables or command-line options. + +Physical verification cannot detect logical inconsistencies, missing and nullified blocks or entire files, repercussions from PostgreSQL bugs and other wicked anomalies. +Extensions [amcheck](https://www.postgresql.org/docs/current/amcheck.html) and [amcheck_next](https://github.com/petergeoghegan/amcheck) provide a partial solution to these problems. + +If you would like, in addition to physical verification, to verify all indexes in all databases using these extensions, you can specify `--amcheck` flag when running [checkdb](#checkdb) command: + + pg_probackup checkdb -D data_dir --amcheck + +Physical verification can be skipped if `--skip-block-validation` flag is used. For logical only verification *backup_dir* and *data_dir* are optional, only [connection options](#connection-options) are mandatory: + + pg_probackup checkdb --amcheck --skip-block-validation {connection_options} + +Logical verification can be done more thoroughly with flag `--heapallindexed` by checking that all heap tuples that should be indexed are actually indexed, but at the higher cost of CPU, memory and I/O comsumption. + +### Validating a Backup + +pg_probackup calculates checksums for each file in a backup during backup process. The process of checking checksumms of backup data files is called `the backup validation`. By default validation is run immediately after backup is taken and right before restore, to detect possible backup corruption. + +If you would like to skip backup validation, you can specify the `--no-validate` flag when running [backup](#backup) and [restore](#restore) commands. + +To ensure that all the required backup files are present and can be used to restore the database cluster, you can run the [validate](#validate) command with the exact [recovery target options](#recovery-target-options) you are going to use for recovery. + +For example, to check that you can restore the database cluster from a backup copy up to the specified xid transaction ID, run this command: + + pg_probackup validate -B backup_dir --instance instance_name --recovery-target-xid=4242 + +If validation completes successfully, pg_probackup displays the corresponding message. If validation fails, you will receive an error message with the exact time, transaction ID and LSN up to which the recovery is possible. + +If you specify *backup_id* via `-i/--backup-id` option, then only backup copy with specified backup ID will be validated. If *backup_id* is specified with [recovery target options](#recovery-target-options) then validate will check whether it is possible to restore the specified backup to the specified `recovery target`. + +For example, to check that you can restore the database cluster from a backup copy with *backup_id* up to the specified timestamp, run this command: + + pg_probackup validate -B backup_dir --instance instance_name -i PT8XFX --recovery-target-time='2017-05-18 14:18:11+03' + +If *backup_id* belong to incremental backup, then all its parents starting from FULL backup will be validated. + +If you omit all the parameters, all backups are validated. + +### Restoring a Cluster + +To restore the database cluster from a backup, run the restore command with at least the following options: + + pg_probackup restore -B backup_dir --instance instance_name -i backup_id + +Where: + +- *backup_dir* is the backup catalog that stores all backup files and meta information. +- *instance_name* is the backup instance for the cluster to be restored. +- *backup_id* specifies the backup to restore the cluster from. If you omit this option, pg_probackup uses the latest valid backup available for the specified instance. If you specify an incremental backup to restore, pg_probackup automatically restores the underlying full backup and then sequentially applies all the necessary increments. + +If the cluster to restore contains tablespaces, pg_probackup restores them to their original location by default. To restore tablespaces to a different location, use the `--tablespace-mapping/-T` option. Otherwise, restoring the cluster on the same host will fail if tablespaces are in use, because the backup would have to be written to the same directories. + +When using the `--tablespace-mapping/-T` option, you must provide absolute paths to the old and new tablespace directories. If a path happens to contain an equals sign (=), escape it with a backslash. This option can be specified multiple times for multiple tablespaces. For example: + + pg_probackup restore -B backup_dir --instance instance_name -D data_dir -j 4 -i backup_id -T tablespace1_dir=tablespace1_newdir -T tablespace2_dir=tablespace2_newdir + +Once the restore command is complete, start the database service. + +If you are restoring an STREAM backup, the restore is complete at once, with the cluster returned to a self-consistent state at the point when the backup was taken. For ARCHIVE backups, PostgreSQL replays all available archived WAL segments, so the cluster is restored to the latest state possible. You can change this behavior by using the [recovery target options](#recovery-target-options) with the `restore` command. Note that using the [recovery target options](#recovery-target-options) when restoring STREAM backup is possible if the WAL archive is available at least starting from the time the STREAM backup was taken. + +To restore cluster on remote host see the section [Using pg_probackup in the Remote Mode](#using-pg-probackup-in-the-remote-mode). + +>NOTE: By default, the [restore](#restore) command validates the specified backup before restoring the cluster. If you run regular backup validations and would like to save time when restoring the cluster, you can specify the `--no-validate` flag to skip validation and speed up the recovery. + +#### Partial Restore + +If you have enabled [partial restore](#setting-up-partial-restore) before taking backups, you can restore or exclude from restore the arbitrary number of specific databases using [partial restore options](#partial-restore-options) with the [restore](#restore) commands. + +To restore only one or more databases, run the restore command with the following options: + + pg_probackup restore -B backup_dir --instance instance_name --db-include=database_name + +The option `--db-include` can be specified multiple times. For example, to restore only databases `db1` and `db2`, run the following command: + + pg_probackup restore -B backup_dir --instance instance_name --db-include=db1 --db-include=db2 + +To exclude one or more specific databases from restore, run the following options: + + pg_probackup restore -B backup_dir --instance instance_name --db-exclude=database_name + +The option `--db-exclude` can be specified multiple times. For example, to exclude the databases `db1` and `db2` from restore, run the following command: + + pg_probackup restore -B backup_dir --instance instance_name -i backup_id --db-exclude=db1 --db-exclude=db2 + +Partial restore rely on lax behaviour of PostgreSQL recovery process toward truncated files. Files of excluded databases restored as null sized files, allowing recovery to work properly. After successfull starting of PostgreSQL cluster, you must drop excluded databases using `DROP DATABASE` command. + +>NOTE: The databases `template0` and `template1` are always restored. + +### Performing Point-in-Time (PITR) Recovery + +If you have enabled [continuous WAL archiving](#setting-up-continuous-wal-archiving) before taking backups, you can restore the cluster to its state at an arbitrary point in time (recovery target) using [recovery target options](#recovery-target-options) with the [restore](#restore) and [validate](#validate) commands. + +If `-i/--backup-id` option is omitted, pg_probackup automatically chooses the backup that is the closest to the specified recovery target and starts the restore process, otherwise pg_probackup will try to restore *backup_id* to the specified recovery target. + +- To restore the cluster state at the exact time, specify the `--recovery-target-time` option, in the timestamp format. For example: + + pg_probackup restore -B backup_dir --instance instance_name --recovery-target-time='2017-05-18 14:18:11+03' + +- To restore the cluster state up to a specific transaction ID, use the `--recovery-target-xid` option: + + pg_probackup restore -B backup_dir --instance instance_name --recovery-target-xid=687 + +- To restore the cluster state up to a specific LSN, use `--recovery-target-lsn` option: + + pg_probackup restore -B backup_dir --instance instance_name --recovery-target-lsn=16/B374D848 + +- To restore the cluster state up to a specific named restore point, use `--recovery-target-name` option: + + pg_probackup restore -B backup_dir --instance instance_name --recovery-target-name='before_app_upgrade' + +- To restore the backup to the latest state available in archive, use `--recovery-target` option with `latest` value: + + pg_probackup restore -B backup_dir --instance instance_name --recovery-target='latest' + +- To restore the cluster to the earliest point of consistency, use `--recovery-target` option with `immediate` value: + + pg_probackup restore -B backup_dir --instance instance_name --recovery-target='immediate' + +### Using pg_probackup in the Remote Mode + +pg_probackup supports the remote mode that allows to perform `backup` and `restore` operations remotely via SSH. In this mode, the backup catalog is stored on a local system, while PostgreSQL instance to be backed up is located on a remote system. You must have pg_probackup installed on both systems. + +Do note that pg_probackup rely on passwordless SSH connection for communication between the hosts. + +The typical workflow is as follows: + + - On your backup host, configure pg_probackup as explained in the section [Installation and Setup](#installation-and-setup). For the [add-instance](#add-instance) and [set-config](#set-config) commands, make sure to specify [remote options](#remote-mode-options) that point to the database host with the PostgreSQL instance. + +- If you would like to take remote backup in [PAGE](#creating-a-backup) mode, or rely on [ARCHIVE](#archive-mode) WAL delivery mode, or use [PITR](#performing-point-in-time-pitr-recovery), then configure continuous WAL archiving from database host to the backup host as explained in the section [Setting up continuous WAL archiving](#setting-up-continuous-wal-archiving). For the [archive-push](#archive-push) and [archive-get](#archive-get) commands, you must specify the [remote options](#remote-mode-options) that point to backup host with backup catalog. + +- Run [backup](#backup) or [restore](#restore) commands with [remote options](#remote-mode-options) **on backup host**. pg_probackup connects to the remote system via SSH and creates a backup locally or restores the previously taken backup on the remote system, respectively. + +For example, to create archive full backup using remote mode through SSH connection to user `postgres` on host with address `192.168.0.2` via port `2302`, run: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 + +For example, to restore latest backup on remote system using remote mode through SSH connection to user `postgres` on host with address `192.168.0.2` via port `2302`, run: + + pg_probackup restore -B backup_dir --instance instance_name --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 + +Restoring of ARCHIVE backup or performing PITR in remote mode require additional information: destination address, port and username for establishing ssh connection **from** a host with database **to** a host with backup catalog. This information will be used by `restore_command` to copy via ssh WAL segments from archive to PostgreSQL 'pg_wal' directory. + +To solve this problem you can use [Remote Wal Archive Options](#remote-wal-archive-options). + +For example, to restore latest backup on remote system using remote mode through SSH connection to user `postgres` on host with address `192.168.0.2` via port `2302` and user `backup` on backup catalog host with address `192.168.0.3` via port `2303`, run: + + pg_probackup restore -B backup_dir --instance instance_name --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 --archive-host=192.168.0.3 --archive-port=2303 --archive-user=backup + +Provided arguments will be used to construct 'restore_command' in recovery.conf: +``` +# recovery.conf generated by pg_probackup 2.1.5 +restore_command = 'pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=%p --wal-file-name=%f --remote-host=192.168.0.3 --remote-port=2303 --remote-user=backup' +``` + +Alternatively you can use `--restore-command` option to provide an entire 'restore_command': + + pg_probackup restore -B backup_dir --instance instance_name --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 --restore-command='pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=%p --wal-file-name=%f --remote-host=192.168.0.3 --remote-port=2303 --remote-user=backup' + +>NOTE: The remote backup mode is currently unavailable for Windows systems. + +### Running pg_probackup on Parallel Threads + +[Backup](#backup), [restore](#restore), [merge](#merge), [delete](#delete), [checkdb](#checkdb) and [validate](#validate) processes can be executed on several parallel threads. This can significantly speed up pg_probackup operation given enough resources (CPU cores, disk and network bandwidth). + +Parallel execution is controlled by the `-j/--threads` command line option. For example, to create a backup using four parallel threads, run: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL -j 4 + +>NOTE: Parallel restore applies only to copying data from the backup catalog to the data directory of the cluster. When PostgreSQL server is started, WAL records need to be replayed, and this cannot be done in parallel. + +### Configuring pg_probackup + +Once the backup catalog is initialized and a new backup instance is added, you can use the pg_probackup.conf configuration file located in the '*backup_dir*/backups/*instance_name*' directory to fine-tune pg_probackup configuration. + +For example, [backup](#backup) and [checkdb](#checkdb) commands uses a regular PostgreSQL connection. To avoid specifying these options each time on the command line, you can set them in the pg_probackup.conf configuration file using the [set-config](#set-config) command. + +>NOTE: It is **not recommended** to edit pg_probackup.conf manually. + +Initially, pg_probackup.conf contains the following settings: + +- PGDATA — the path to the data directory of the cluster to back up. +- system-identifier — the unique identifier of the PostgreSQL instance. + +Additionally, you can define [remote](#remote-mode-options), [retention](#retention-options), [logging](#logging-options) and [compression](#compression-options) settings using the `set-config` command: + + pg_probackup set-config -B backup_dir --instance instance_name + [--external-dirs=external_directory_path] [remote_options] [connection_options] [retention_options] [logging_options] + +To view the current settings, run the following command: + + pg_probackup show-config -B backup_dir --instance instance_name + +You can override the settings defined in pg_probackup.conf when running pg_probackups [commands](#commands) via corresponding environment variables and/or command line options. + +### Specifying Connection Settings + +If you define connection settings in the 'pg_probackup.conf' configuration file, you can omit connection options in all the subsequent pg_probackup commands. However, if the corresponding environment variables are set, they get higher priority. The options provided on the command line overwrite both environment variables and configuration file settings. + +If nothing is given, the default values are taken. By default pg_probackup tries to use local connection via Unix domain socket (localhost on Windows) and tries to get the database name and the user name from the PGUSER environment variable or the current OS user name. + +### Managing the Backup Catalog + +With pg_probackup, you can manage backups from the command line: + +- [View backup information](#viewing-backup-information) +- [View WAL Archive Information](#viewing-wal-archive-information) +- [Validate backups](#validating-a-backup) +- [Merge backups](#merging-backups) +- [Delete backups](#deleting-backups) + +#### Viewing Backup Information + +To view the list of existing backups for every instance, run the command: + + pg_probackup show -B backup_dir + +pg_probackup displays the list of all the available backups. For example: + +``` +BACKUP INSTANCE 'node' +====================================================================================================================================== + Instance Version ID Recovery time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +====================================================================================================================================== + node 10 PYSUE8 2019-10-03 15:51:48+03 FULL ARCHIVE 1/0 16s 9047kB 16MB 4.31 0/12000028 0/12000160 OK + node 10 P7XDQV 2018-04-29 05:32:59+03 DELTA STREAM 1/1 11s 19MB 16MB 1.00 0/15000060 0/15000198 OK + node 10 P7XDJA 2018-04-29 05:28:36+03 PTRACK STREAM 1/1 21s 32MB 32MB 1.00 0/13000028 0/13000198 OK + node 10 P7XDHU 2018-04-29 05:27:59+03 PAGE STREAM 1/1 15s 33MB 16MB 1.00 0/11000028 0/110001D0 OK + node 10 P7XDHB 2018-04-29 05:27:15+03 FULL STREAM 1/0 11s 39MB 16MB 1.00 0/F000028 0/F000198 OK +``` + +For each backup, the following information is provided: + +- Instance — the instance name. +- Version — PostgreSQL major version. +- ID — the backup identifier. +- Recovery time — the earliest moment for which you can restore the state of the database cluster. +- Mode — the method used to take this backup. Possible values: FULL, PAGE, DELTA, PTRACK. +- WAL Mode — the WAL delivery mode. Possible values: STREAM and ARCHIVE. +- TLI — timeline identifiers of current backup and its parent. +- Time — the time it took to perform the backup. +- Data — the size of the data files in this backup. This value does not include the size of WAL files. In case of STREAM backup the total size of backup can be calculated as 'Data' + 'WAL'. +- WAL — the uncompressed size of WAL files required to apply by PostgreSQL recovery process to reach consistency. +- Zratio — compression ratio calculated as 'uncompressed-bytes' / 'data-bytes'. +- Start LSN — WAL log sequence number corresponding to the start of the backup process. REDO point for PostgreSQL recovery process to start from. +- Stop LSN — WAL log sequence number corresponding to the end of the backup process. Consistency point for PostgreSQL recovery process. +- Status — backup status. Possible values: + + - OK — the backup is complete and valid. + - DONE — the backup is complete, but was not validated. + - RUNNING — the backup is in progress. + - MERGING — the backup is being merged. + - DELETING — the backup files are being deleted. + - CORRUPT — some of the backup files are corrupted. + - ERROR — the backup was aborted because of an unexpected error. + - ORPHAN — the backup is invalid because one of its parent backups is corrupt or missing. + +You can restore the cluster from the backup only if the backup status is OK or DONE. + +To get more detailed information about the backup, run the show with the backup ID: + + pg_probackup show -B backup_dir --instance instance_name -i backup_id + +The sample output is as follows: + +``` +#Configuration +backup-mode = FULL +stream = false +compress-alg = zlib +compress-level = 1 +from-replica = false + +#Compatibility +block-size = 8192 +wal-block-size = 8192 +checksum-version = 1 +program-version = 2.1.3 +server-version = 10 + +#Result backup info +timelineid = 1 +start-lsn = 0/04000028 +stop-lsn = 0/040000f8 +start-time = '2017-05-16 12:57:29' +end-time = '2017-05-16 12:57:31' +recovery-xid = 597 +recovery-time = '2017-05-16 12:57:31' +expire-time = '2020-05-16 12:57:31' +data-bytes = 22288792 +wal-bytes = 16777216 +uncompressed-bytes = 39961833 +pgdata-bytes = 39859393 +status = OK +parent-backup-id = 'PT8XFX' +primary_conninfo = 'user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any' +``` + +Detailed output has additional attributes: +- compress-alg — compression algorithm used during backup. Possible values: 'zlib', 'pglz', 'none'. +- compress-level — compression level used during backup. +- from-replica — the fact that backup was taken from standby server. Possible values: '1', '0'. +- block-size — (block_size)[https://www.postgresql.org/docs/current/runtime-config-preset.html#GUC-BLOCK-SIZE] setting of PostgreSQL cluster at the moment of backup start. +- wal-block-size — (wal_block_size)[https://www.postgresql.org/docs/current/runtime-config-preset.html#GUC-WAL-BLOCK-SIZE] setting of PostgreSQL cluster at the moment of backup start. +- checksum-version — the fact that PostgreSQL cluster, from which backup is taken, has enabled [data block checksumms](https://www.postgresql.org/docs/current/runtime-config-preset.html#GUC-DATA-CHECKSUMS). Possible values: '1', '0'. +- program-version — full version of pg_probackup binary used to create backup. +- start-time — the backup starting time. +- end-time — the backup ending time. +- expire-time — if the backup was pinned, then until this point in time the backup cannot be removed by retention purge. +- uncompressed-bytes — size of the data files before adding page headers and applying compression. You can evaluate the effectiveness of compression by comparing 'uncompressed-bytes' to 'data-bytes' if compression if used. +- pgdata-bytes — size of the PostgreSQL cluster data files at the time of backup. You can evaluate the effectiveness of incremental backup by comparing 'pgdata-bytes' to 'uncompressed-bytes'. +- recovery-xid — current transaction id at the moment of backup ending. +- parent-backup-id — backup ID of parent backup. Available only for incremental backups. +- primary_conninfo — libpq conninfo used for connection to PostgreSQL cluster during backup. The password is not included. + +To get more detailed information about the backup in json format, run the show with the backup ID: + + pg_probackup show -B backup_dir --instance instance_name --format=json -i backup_id + +The sample output is as follows: + +``` +[ + { + "instance": "node", + "backups": [ + { + "id": "PT91HZ", + "parent-backup-id": "PT8XFX", + "backup-mode": "DELTA", + "wal": "ARCHIVE", + "compress-alg": "zlib", + "compress-level": 1, + "from-replica": false, + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.3", + "server-version": "10", + "current-tli": 16, + "parent-tli": 2, + "start-lsn": "0/8000028", + "stop-lsn": "0/8000160", + "start-time": "2019-06-17 18:25:11+03", + "end-time": "2019-06-17 18:25:16+03", + "recovery-xid": 0, + "recovery-time": "2019-06-17 18:25:15+03", + "data-bytes": 106733, + "wal-bytes": 16777216, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + } +] +``` + +#### Viewing WAL Archive Information + +To view the information about WAL archive for every instance, run the command: + + pg_probackup show -B backup_dir [--instance instance_name] --archive + +pg_probackup displays the list of all the available WAL files grouped by timelines. For example: + +``` +ARCHIVE INSTANCE 'node' +=================================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=================================================================================================================== + 5 1 0/B000000 000000000000000B 000000000000000C 2 685kB 48.00 0 OK + 4 3 0/18000000 0000000000000018 000000000000001A 3 648kB 77.00 0 OK + 3 2 0/15000000 0000000000000015 0000000000000017 3 648kB 77.00 0 OK + 2 1 0/B000108 000000000000000B 0000000000000015 5 892kB 94.00 1 DEGRADED + 1 0 0/0 0000000000000001 000000000000000A 10 8774kB 19.00 1 OK + +``` + +For each backup, the following information is provided: + +- TLI — timeline identifier. +- Parent TLI — identifier of timeline TLI branched off. +- Switchpoint — LSN of the moment when the timeline branched off from "Parent TLI". +- Min Segno — number of the first existing WAL segment belonging to the timeline. +- Max Segno — number of the last existing WAL segment belonging to the timeline. +- N segments — number of WAL segments belonging to the timeline. +- Size — the size files take on disk. +- Zratio — compression ratio calculated as 'N segments' * wal_seg_size / 'Size'. +- N backups — number of backups belonging to the timeline. To get the details about backups, use json format. +- Status — archive status for this exact timeline. Possible values: + - OK — all WAL segments between Min and Max are present. + - DEGRADED — some WAL segments between Min and Max are lost. To get details about lost files, use json format. + +To get more detailed information about the WAL archive in json format, run the command: + + pg_probackup show -B backup_dir [--instance instance_name] --archive --format=json + +The sample output is as follows: + +``` +[ + { + "instance": "replica", + "timelines": [ + { + "tli": 5, + "parent-tli": 1, + "switchpoint": "0/B000000", + "min-segno": "000000000000000B", + "max-segno": "000000000000000C", + "n-segments": 2, + "size": 685320, + "zratio": 48.00, + "closest-backup-id": "PXS92O", + "status": "OK", + "lost-segments": [], + "backups": [] + }, + { + "tli": 4, + "parent-tli": 3, + "switchpoint": "0/18000000", + "min-segno": "0000000000000018", + "max-segno": "000000000000001A", + "n-segments": 3, + "size": 648625, + "zratio": 77.00, + "closest-backup-id": "PXS9CE", + "status": "OK", + "lost-segments": [], + "backups": [] + }, + { + "tli": 3, + "parent-tli": 2, + "switchpoint": "0/15000000", + "min-segno": "0000000000000015", + "max-segno": "0000000000000017", + "n-segments": 3, + "size": 648911, + "zratio": 77.00, + "closest-backup-id": "PXS9CE", + "status": "OK", + "lost-segments": [], + "backups": [] + }, + { + "tli": 2, + "parent-tli": 1, + "switchpoint": "0/B000108", + "min-segno": "000000000000000B", + "max-segno": "0000000000000015", + "n-segments": 5, + "size": 892173, + "zratio": 94.00, + "closest-backup-id": "PXS92O", + "status": "DEGRADED", + "lost-segments": [ + { + "begin-segno": "000000000000000D", + "end-segno": "000000000000000E" + }, + { + "begin-segno": "0000000000000010", + "end-segno": "0000000000000012" + } + ], + "backups": [ + { + "id": "PXS9CE", + "backup-mode": "FULL", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "false", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 2, + "parent-tli": 0, + "start-lsn": "0/C000028", + "stop-lsn": "0/C000160", + "start-time": "2019-09-13 21:43:26+03", + "end-time": "2019-09-13 21:43:30+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:43:29+03", + "data-bytes": 104674852, + "wal-bytes": 16777216, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + }, + { + "tli": 1, + "parent-tli": 0, + "switchpoint": "0/0", + "min-segno": "0000000000000001", + "max-segno": "000000000000000A", + "n-segments": 10, + "size": 8774805, + "zratio": 19.00, + "closest-backup-id": "", + "status": "OK", + "lost-segments": [], + "backups": [ + { + "id": "PXS92O", + "backup-mode": "FULL", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "true", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 1, + "parent-tli": 0, + "start-lsn": "0/4000028", + "stop-lsn": "0/6000028", + "start-time": "2019-09-13 21:37:36+03", + "end-time": "2019-09-13 21:38:45+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:37:30+03", + "data-bytes": 25987319, + "wal-bytes": 50331648, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + } + ] + }, + { + "instance": "master", + "timelines": [ + { + "tli": 1, + "parent-tli": 0, + "switchpoint": "0/0", + "min-segno": "0000000000000001", + "max-segno": "000000000000000B", + "n-segments": 11, + "size": 8860892, + "zratio": 20.00, + "status": "OK", + "lost-segments": [], + "backups": [ + { + "id": "PXS92H", + "parent-backup-id": "PXS92C", + "backup-mode": "PAGE", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "false", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 1, + "parent-tli": 1, + "start-lsn": "0/4000028", + "stop-lsn": "0/50000B8", + "start-time": "2019-09-13 21:37:29+03", + "end-time": "2019-09-13 21:37:31+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:37:30+03", + "data-bytes": 1328461, + "wal-bytes": 33554432, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + }, + { + "id": "PXS92C", + "backup-mode": "FULL", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "false", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 1, + "parent-tli": 0, + "start-lsn": "0/2000028", + "stop-lsn": "0/2000160", + "start-time": "2019-09-13 21:37:24+03", + "end-time": "2019-09-13 21:37:29+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:37:28+03", + "data-bytes": 24871902, + "wal-bytes": 16777216, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + } + ] + } +] +``` + +Most fields are consistent with plain format, with some exceptions: + +- size is in bytes. +- 'closest-backup-id' attribute contain ID of valid backup closest to the timeline, located on some of the previous timelines. This backup is the closest starting point to reach the timeline from other timelines by PITR. Closest backup always has a valid status, either OK or DONE. If such backup do not exists, then string is empty. +- DEGRADED timelines contain 'lost-segments' array with information about intervals of missing segments. In OK timelines 'lost-segments' array is empty. +- 'N backups' attribute is replaced with 'backups' array containing backups belonging to the timeline. If timeline has no backups, then 'backups' array is empty. + +### Configuring Retention Policy + +With pg_probackup, you can set retention policies for backups and WAL archive. All policies can be combined together in any way. + +#### Backup Retention Policy + +By default, all backup copies created with pg_probackup are stored in the specified backup catalog. To save disk space, you can configure retention policy and periodically clean up redundant backup copies accordingly. + +To configure retention policy, set one or more of the following variables in the pg_probackup.conf file via [set-config](#set-config): + + --retention-redundancy=redundancy +Specifies **the number of full backup copies** to keep in the backup catalog. + + --retention-window=window +Defines the earliest point in time for which pg_probackup can complete the recovery. This option is set in **the number of days** from the current moment. For example, if `retention-window=7`, pg_probackup must delete all backup copies that are older than seven days, with all the corresponding WAL files. + +If both `--retention-redundancy` and `--retention-window` options are set, pg_probackup keeps backup copies that satisfy at least one condition. For example, if you set `--retention-redundancy=2` and `--retention-window=7`, pg_probackup purges the backup catalog to keep only two full backup copies and all backups that are newer than seven days: + + pg_probackup set-config -B backup_dir --instance instance_name --retention-redundancy=2 --retention-window=7 + +To clean up the backup catalog in accordance with retention policy, run: + + pg_probackup delete -B backup_dir --instance instance_name --delete-expired + +pg_probackup deletes all backup copies that do not conform to the defined retention policy. + +If you would like to also remove the WAL files that are no longer required for any of the backups, add the `--delete-wal` flag: + + pg_probackup delete -B backup_dir --instance instance_name --delete-expired --delete-wal + +>NOTE: Alternatively, you can use the `--delete-expired`, `--merge-expired`, `--delete-wal` flags and the `--retention-window` and `--retention-redundancy` options together with the [backup](#backup) command to remove and merge the outdated backup copies once the new backup is created. + +You can set or override the current retention policy by specifying `--retention-redundancy` and `--retention-window` options directly when running `delete` or `backup` commands: + + pg_probackup delete -B backup_dir --instance instance_name --delete-expired --retention-window=7 --retention-redundancy=2 + +Since incremental backups require that their parent full backup and all the preceding incremental backups are available, if any of such backups expire, they still cannot be removed while at least one incremental backup in this chain satisfies the retention policy. To avoid keeping expired backups that are still required to restore an active incremental one, you can merge them with this backup using the `--merge-expired` flag when running [backup](#backup) or [delete](#delete) commands. + +Suppose you have backed up the *node* instance in the *backup_dir* directory, with the `--retention-window` option is set to *7*, and you have the following backups available on April 10, 2019: + +``` +BACKUP INSTANCE 'node' +=================================================================================================================================== + Instance Version ID Recovery time Mode WAL TLI Time Data WAL Zratio Start LSN Stop LSN Status +=================================================================================================================================== + node 10 P7XDHR 2019-04-10 05:27:15+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/18000059 0/18000197 OK + node 10 P7XDQV 2019-04-08 05:32:59+03 PAGE STREAM 1/0 11s 19MB 16MB 1.0 0/15000060 0/15000198 OK + node 10 P7XDJA 2019-04-03 05:28:36+03 DELTA STREAM 1/0 21s 32MB 16MB 1.0 0/13000028 0/13000198 OK + -------------------------------------------------------retention window-------------------------------------------------------- + node 10 P7XDHU 2019-04-02 05:27:59+03 PAGE STREAM 1/0 31s 33MB 16MB 1.0 0/11000028 0/110001D0 OK + node 10 P7XDHB 2019-04-01 05:27:15+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/F000028 0/F000198 OK + node 10 P7XDFT 2019-03-29 05:26:25+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/D000028 0/D000198 OK +``` + +Even though P7XDHB and P7XDHU backups are outside the retention window, they cannot be removed as it invalidates the succeeding incremental backups P7XDJA and P7XDQV that are still required, so, if you run the [delete](#delete) command with the `--delete-expired` flag, only the P7XDFT full backup will be removed. + +With the `--merge-expired` option, the P7XDJA backup is merged with the underlying P7XDHU and P7XDHB backups and becomes a full one, so there is no need to keep these expired backups anymore: + + pg_probackup delete -B backup_dir --instance node --delete-expired --merge-expired + pg_probackup show -B backup_dir + +``` +BACKUP INSTANCE 'node' +================================================================================================================================== + Instance Version ID Recovery time Mode WAL TLI Time Data WAL Zratio Start LSN Stop LSN Status +================================================================================================================================== + node 10 P7XDHR 2019-04-10 05:27:15+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/18000059 0/18000197 OK + node 10 P7XDQV 2019-04-08 05:32:59+03 PAGE STREAM 1/0 11s 19MB 16MB 1.0 0/15000060 0/15000198 OK + node 10 P7XDJA 2019-04-03 05:28:36+03 FULL STREAM 1/0 21s 32MB 16MB 1.0 0/13000028 0/13000198 OK +``` + +>NOTE: The Time field for the merged backup displays the time required for the merge. + +#### Backup Pinning + +If you have the necessity to exclude certain backups from established retention policy then it is possible to pin a backup for an arbitrary amount of time. Example: + + pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id --ttl=30d + +This command will set `expire-time` of the specified backup to 30 days starting from backup `recovery-time` attribute. Basically `expire-time` = `recovery-time` + `ttl`. + +Also you can set `expire-time` explicitly using `--expire-time` option. Example: + + pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id --expire-time='2020-01-01 00:00:00+03' + +Alternatively you can use the `--ttl` and `--expire-time` options with the [backup](#backup) command to pin newly created backup: + + pg_probackup backup -B backup_dir --instance instance_name -b FULL --ttl=30d + pg_probackup backup -B backup_dir --instance instance_name -b FULL --expire-time='2020-01-01 00:00:00+03' + +You can determine the fact that backup is pinned and check due expire time by looking up `expire-time` attribute in backup metadata via [show](#show) command: + + pg_probackup show --instance instance_name -i backup_id + +Pinned backup has `expire-time` attribute: +``` +... +recovery-time = '2017-05-16 12:57:31' +expire-time = '2020-01-01 00:00:00+03' +data-bytes = 22288792 +... +``` + +You can unpin the backup by setting the `--ttl` option to zero using [set-backup](#set-backup) command. Example: + + pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id --ttl=0 + +Only pinned backups have the `expire-time` attribute in the backup metadata. + +>NOTE: Pinned incremental backup will also implicitly pin all its parent backups. + +#### WAL Archive Retention Policy + +By default, pg_probackup treatment of WAL Archive is very conservative and only "redundant" WAL segments can be purged, i.e. segments that cannot be applied to any of the existing backups in the backup catalog. To save disk space, you can configure WAL Archive retention policy, that allows to keep WAL of limited depth measured in backups per timeline. + +Suppose you have backed up the *node* instance in the *backup_dir* directory with configured [WAL archiving](#setting-up-continuous-wal-archiving): + + pg_probackup show -B backup_dir --instance node + +``` +BACKUP INSTANCE 'node' +==================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +==================================================================================================================================== + node 11 PZ9442 2019-10-12 10:43:21+03 DELTA STREAM 1/0 10s 121kB 16MB 1.00 0/46000028 0/46000160 OK + node 11 PZ943L 2019-10-12 10:43:04+03 FULL STREAM 1/0 10s 180MB 32MB 1.00 0/44000028 0/44000160 OK + node 11 PZ7YR5 2019-10-11 19:49:56+03 DELTA STREAM 1/1 10s 112kB 32MB 1.00 0/41000028 0/41000160 OK + node 11 PZ7YMP 2019-10-11 19:47:16+03 DELTA STREAM 1/1 10s 376kB 32MB 1.00 0/3E000028 0/3F0000B8 OK + node 11 PZ7YK2 2019-10-11 19:45:45+03 FULL STREAM 1/0 11s 180MB 16MB 1.00 0/3C000028 0/3C000198 OK + node 11 PZ7YFO 2019-10-11 19:43:04+03 FULL STREAM 1/0 10s 30MB 16MB 1.00 0/2000028 0/200ADD8 OK +``` + +The state of WAL archive can be determined by using [show](#command) command with `--archive` flag: + + pg_probackup show -B backup_dir --instance node --archive + +``` +ARCHIVE INSTANCE 'node' +=============================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=============================================================================================================== + 1 0 0/0 0000000000000001 0000000000000047 71 36MB 31.00 6 OK +``` + +General WAL purge without `wal-depth` cannot achieve much, only one segment can be removed: + + pg_probackup delete -B backup_dir --instance node --delete-wal + +``` +ARCHIVE INSTANCE 'node' +=============================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=============================================================================================================== + 1 0 0/0 0000000000000002 0000000000000047 70 34MB 32.00 6 OK +``` + +If you would like, for example, to keep only those WAL segments that can be applied to the last valid backup, use the `--wal-depth` option: + + pg_probackup delete -B backup_dir --instance node --delete-wal --wal-depth=1 + +``` +ARCHIVE INSTANCE 'node' +================================================================================================================ + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +================================================================================================================ + 1 0 0/0 0000000000000046 0000000000000047 2 143kB 228.00 6 OK +``` + +Alternatively you can use the `--wal-depth` option with the [backup](#backup) command: + + pg_probackup backup -B backup_dir --instance node -b DELTA --wal-depth=1 --delete-wal + +``` +ARCHIVE INSTANCE 'node' +=============================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=============================================================================================================== + 1 0 0/0 0000000000000048 0000000000000049 1 72kB 228.00 7 OK +``` + +### Merging Backups + +As you take more and more incremental backups, the total size of the backup catalog can substantially grow. To save disk space, you can merge incremental backups to their parent full backup by running the merge command, specifying the backup ID of the most recent incremental backup you would like to merge: + + pg_probackup merge -B backup_dir --instance instance_name -i backup_id + +This command merges the specified incremental backup to its parent full backup, together with all incremental backups between them. Once the merge is complete, the incremental backups are removed as redundant. Thus, the merge operation is virtually equivalent to retaking a full backup and removing all the outdated backups, but it allows to save much time, especially for large data volumes, I/O and network traffic in case of [remote](#using-pg_probackup-in-the-remote-mode) backup. + +Before the merge, pg_probackup validates all the affected backups to ensure that they are valid. You can check the current backup status by running the [show](#show) command with the backup ID: + + pg_probackup show -B backup_dir --instance instance_name -i backup_id + +If the merge is still in progress, the backup status is displayed as MERGING. The merge is idempotent, so you can restart the merge if it was interrupted. + +### Deleting Backups + +To delete a backup that is no longer required, run the following command: + + pg_probackup delete -B backup_dir --instance instance_name -i backup_id + +This command will delete the backup with the specified *backup_id*, together with all the incremental backups that descend from *backup_id* if any. This way you can delete some recent incremental backups, retaining the underlying full backup and some of the incremental backups that follow it. + +To delete obsolete WAL files that are not necessary to restore any of the remaining backups, use the `--delete-wal` flag: + + pg_probackup delete -B backup_dir --instance instance_name --delete-wal + +To delete backups that are expired according to the current retention policy, use the `--delete-expired` flag: + + pg_probackup delete -B backup_dir --instance instance_name --delete-expired + +Note that expired backups cannot be removed while at least one incremental backup that satisfies the retention policy is based on them. If you would like to minimize the number of backups still required to keep incremental backups valid, specify the `--merge-expired` flag when running this command: + + pg_probackup delete -B backup_dir --instance instance_name --delete-expired --merge-expired + +In this case, pg_probackup searches for the oldest incremental backup that satisfies the retention policy and merges this backup with the underlying full and incremental backups that have already expired, thus making it a full backup. Once the merge is complete, the remaining expired backups are deleted. + +Before merging or deleting backups, you can run the `delete` command with the `--dry-run` flag, which displays the status of all the available backups according to the current retention policy, without performing any irreversible actions. + +## Command-Line Reference +### Commands + +This section describes pg_probackup commands. Some commands require mandatory parameters and can take additional options. Optional parameters encased in square brackets. For detailed descriptions of options, see the section [Options](#options). + +#### version + + pg_probackup version + +Prints pg_probackup version. + +#### help + + pg_probackup help [command] + +Displays the synopsis of pg_probackup commands. If one of the pg_probackup commands is specified, shows detailed information about the options that can be used with this command. + +#### init + + pg_probackup init -B backup_dir [--help] + +Initializes the backup catalog in *backup_dir* that will store backup copies, WAL archive and meta information for the backed up database clusters. If the specified *backup_dir* already exists, it must be empty. Otherwise, pg_probackup displays a corresponding error message. + +For details, see the secion [Initializing the Backup Catalog](#initializing-the-backup-catalog). + +#### add-instance + + pg_probackup add-instance -B backup_dir -D data_dir --instance instance_name + [--help] + +Initializes a new backup instance inside the backup catalog *backup_dir* and generates the pg_probackup.conf configuration file that controls pg_probackup settings for the cluster with the specified *data_dir* data directory. + +For details, see the section [Adding a New Backup Instance](#adding-a-new-backup-instance). + +#### del-instance + + pg_probackup del-instance -B backup_dir --instance instance_name + [--help] + +Deletes all backups and WAL files associated with the specified instance. + +#### set-config + + pg_probackup set-config -B backup_dir --instance instance_name + [--help] [--pgdata=pgdata-path] + [--retention-redundancy=redundancy][--retention-window=window][--wal-depth=wal_depth] + [--compress-algorithm=compression_algorithm] [--compress-level=compression_level] + [-d dbname] [-h host] [-p port] [-U username] + [--archive-timeout=timeout] [--external-dirs=external_directory_path] + [--restore-command=cmdline] + [remote_options] [remote_archive_options] [logging_options] + +Adds the specified connection, compression, retention, logging and external directory settings into the pg_probackup.conf configuration file, or modifies the previously defined values. + +For all available settings, see the [Options](#options) section. + +It is **not recommended** to edit pg_probackup.conf manually. + +#### set-backup + + pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id + {--ttl=ttl | --expire-time=time} [--help] + +Sets the provided backup-specific settings into the backup.control configuration file, or modifies previously defined values. + +For all available settings, see the section [Pinning Options](#pinning-options). + +#### show-config + + pg_probackup show-config -B backup_dir --instance instance_name [--format=plain|json] + +Displays the contents of the pg_probackup.conf configuration file located in the '*backup_dir*/backups/*instance_name*' directory. You can specify the `--format=json` option to return the result in the JSON format. By default, configuration settings are shown as plain text. + +To edit pg_probackup.conf, use the [set-config](#set-config) command. + +#### show + + pg_probackup show -B backup_dir + [--help] [--instance instance_name [-i backup_id | --archive]] [--format=plain|json] + +Shows the contents of the backup catalog. If *instance_name* and *backup_id* are specified, shows detailed information about this backup. You can specify the `--format=json` option to return the result in the JSON format. If `--archive` option is specified, shows the content of WAL archive of the backup catalog. + +By default, the contents of the backup catalog is shown as plain text. + +For details on usage, see the sections [Managing the Backup Catalog](#managing-the-backup-catalog) and [Viewing WAL Archive Information](#viewing-wal-archive-information). + + +#### backup + + pg_probackup backup -B backup_dir -b backup_mode --instance instance_name + [--help] [-j num_threads] [--progress] + [-C] [--stream [-S slot_name] [--temp-slot]] [--backup-pg-log] + [--no-validate] [--skip-block-validation] + [-w --no-password] [-W --password] + [--archive-timeout=timeout] [--external-dirs=external_directory_path] + [connection_options] [compression_options] [remote_options] + [retention_options] [pinning_options] [logging_options] + +Creates a backup copy of the PostgreSQL instance. The *backup_mode* option specifies the backup mode to use. + + -b mode + --backup-mode=mode + +Specifies the backup mode to use. Possible values are: + +- FULL — creates a full backup that contains all the data files of the cluster to be restored. +- DELTA — reads all data files in the data directory and creates an incremental backup for pages that have changed since the previous backup. +- PAGE — creates an incremental PAGE backup based on the WAL files that have changed since the previous full or incremental backup was taken. +- PTRACK — creates an incremental PTRACK backup tracking page changes on the fly. + +``` +-C +--smooth-checkpoint +``` +Spreads out the checkpoint over a period of time. By default, pg_probackup tries to complete the checkpoint as soon as possible. + + --stream +Makes an [STREAM](#stream-mode) backup that includes all the necessary WAL files by streaming them from the database server via replication protocol. + + --temp-slot +Creates a temporary physical replication slot for streaming WAL from the backed up PostgreSQL instance. It ensures that all the required WAL segments remain available if WAL is rotated while the backup is in progress. This flag can only be used together with the `--stream` flag. Default slot name is `pg_probackup_slot`, which can be changed via option `--slot/-S`. + + -S slot_name + --slot=slot_name +Specifies the replication slot for WAL streaming. This option can only be used together with the `--stream` flag. + + --backup-pg-log +Includes the log directory into the backup. This directory usually contains log messages. By default, log directory is excluded. + + -E external_directory_path + --external-dirs=external_directory_path +Includes the specified directory into the backup. This option is useful to back up scripts, sql dumps and configuration files located outside of the data directory. If you would like to back up several external directories, separate their paths by a colon on Unix and a semicolon on Windows. + + --archive-timeout=wait_time +Sets in seconds the timeout for WAL segment archiving and streaming. By default pg_probackup waits 300 seconds. + + --skip-block-validation +Disables block-level checksum verification to speed up backup. + + --no-validate +Skips automatic validation after successfull backup. You can use this flag if you validate backups regularly and would like to save time when running backup operations. + +Additionally [Connection Options](#connection-options), [Retention Options](#retention-options), [Pinning Options](#pinning-options), [Remote Mode Options](#remote-mode-options), [Compression Options](#compression-options), [Logging Options](#logging-options) and [Common Options](#common-options) can be used. + +For details on usage, see the section [Creating a Backup](#creating-a-backup). + +#### restore + + pg_probackup restore -B backup_dir --instance instance_name + [--help] [-D data_dir] [-i backup_id] + [-j num_threads] [--progress] + [-T OLDDIR=NEWDIR] [--external-mapping=OLDDIR=NEWDIR] [--skip-external-dirs] + [-R | --restore-as-replica] [--no-validate] [--skip-block-validation] [--force] + [--restore-command=cmdline] + [recovery_options] [logging_options] [remote_options] + [partial_restore_options] [remote_archive_options] + +Restores the PostgreSQL instance from a backup copy located in the *backup_dir* backup catalog. If you specify a [recovery target option](#recovery-target-options), pg_probackup will find the closest backup and restores it to the specified recovery target. Otherwise, the most recent backup is used. + + -R | --restore-as-replica +Writes a minimal recovery.conf in the output directory to facilitate setting up a standby server. The password is not included. If the replication connection requires a password, you must specify the password manually. + + -T OLDDIR=NEWDIR + --tablespace-mapping=OLDDIR=NEWDIR + +Relocates the tablespace from the OLDDIR to the NEWDIR directory at the time of recovery. Both OLDDIR and NEWDIR must be absolute paths. If the path contains the equals sign (=), escape it with a backslash. This option can be specified multiple times for multiple tablespaces. + + --external-mapping=OLDDIR=NEWDIR +Relocates an external directory included into the backup from the OLDDIR to the NEWDIR directory at the time of recovery. Both OLDDIR and NEWDIR must be absolute paths. If the path contains the equals sign (=), escape it with a backslash. This option can be specified multiple times for multiple directories. + + --skip-external-dirs +Skip external directories included into the backup with the `--external-dirs` option. The contents of these directories will not be restored. + + --skip-block-validation +Disables block-level checksum verification to speed up validation. During automatic validation before restore only file-level checksums will be verified. + + --no-validate +Skips backup validation. You can use this flag if you validate backups regularly and would like to save time when running restore operations. + + --restore-command=cmdline +Set the [restore_command](https://www.postgresql.org/docs/current/archive-recovery-settings.html#RESTORE-COMMAND) parameter to specified command. Example: `--restore-command='cp /mnt/server/archivedir/%f "%p"'` + + --force +Allows to ignore the invalid status of the backup. You can use this flag if you for some reason have the necessity to restore PostgreSQL cluster from corrupted or invalid backup. Use with caution. + +Additionally [Recovery Target Options](#recovery-target-options), [Remote Mode Options](#remote-mode-options), [Remote WAL Archive Options](#remote-wal-archive-options), [Logging Options](#logging-options), [Partial Restore](#partial-restore) and [Common Options](#common-options) can be used. + +For details on usage, see the section [Restoring a Cluster](#restoring-a-cluster). + +#### checkdb + + pg_probackup checkdb + [-B backup_dir] [--instance instance_name] [-D data_dir] + [--help] [-j num_threads] [--progress] + [--skip-block-validation] [--amcheck] [--heapallindexed] + [connection_options] [logging_options] + +Verifies the PostgreSQL database cluster correctness by detecting physical and logical corruption. + + --amcheck +Performs logical verification of indexes for the specified PostgreSQL instance if no corruption was found while checking data files. You must have the `amcheck` extention or the `amcheck_next` extension installed in the database to check its indexes. For databases without amcheck, index verification will be skipped. + + --skip-block-validation +Skip validation of data files. Can be used only with `--amcheck` flag, so only logical verification of indexes is performed. + + --heapallindexed +Checks that all heap tuples that should be indexed are actually indexed. You can use this flag only together with the `--amcheck` flag. Can be used only with `amcheck` extension of version 2.0 and `amcheck_next` extension of any version. + +Additionally [Connection Options](#connection-options) and [Logging Options](#logging-options) can be used. + +For details on usage, see the section [Verifying a Cluster](#verifying-a-cluster). + +#### validate + + pg_probackup validate -B backup_dir + [--help] [--instance instance_name] [-i backup_id] + [-j num_threads] [--progress] + [--skip-block-validation] + [recovery_target_options] [logging_options] + +Verifies that all the files required to restore the cluster are present and not corrupted. If *instance_name* is not specified, pg_probackup validates all backups available in the backup catalog. If you specify the *instance_name* without any additional options, pg_probackup validates all the backups available for this backup instance. If you specify the *instance_name* with a [recovery target options](#recovery-target-options) and/or a *backup_id*, pg_probackup checks whether it is possible to restore the cluster using these options. + +For details, see the section [Validating a Backup](#validating-a-backup). + +#### merge + + pg_probackup merge -B backup_dir --instance instance_name -i backup_id + [--help] [-j num_threads] [--progress] + [logging_options] + +Merges the specified incremental backup to its parent full backup, together with all incremental backups between them, if any. As a result, the full backup takes in all the merged data, and the incremental backups are removed as redundant. + +For details, see the section [Merging Backups](#merging-backups). + +#### delete + + pg_probackup delete -B backup_dir --instance instance_name + [--help] [-j num_threads] [--progress] + [--retention-redundancy=redundancy][--retention-window=window][--wal-depth=wal_depth] + [--delete-wal] {-i backup_id | --delete-expired [--merge-expired] | --merge-expired} + [--dry-run] + [logging_options] + +Deletes backup with specified *backip_id* or launches the retention purge of backups and archived WAL that do not satisfy the current retention policies. + +For details, see the sections [Deleting Backups](#deleting-backups), [Retention Options](#retention-options) and [Configuring Retention Policy](#configuring-retention-policy). + +#### archive-push + + pg_probackup archive-push -B backup_dir --instance instance_name + --wal-file-path=wal_file_path --wal-file-name=wal_file_name + [--help] [--compress] [--compress-algorithm=compression_algorithm] + [--compress-level=compression_level] [--overwrite] + [remote_options] [logging_options] + +Copies WAL files into the corresponding subdirectory of the backup catalog and validates the backup instance by *instance_name* and *system-identifier*. If parameters of the backup instance and the cluster do not match, this command fails with the following error message: “Refuse to push WAL segment segment_name into archive. Instance parameters mismatch.” For each WAL file moved to the backup catalog, you will see the following message in PostgreSQL logfile: “pg_probackup archive-push completed successfully”. + +If the files to be copied already exist in the backup catalog, pg_probackup computes and compares their checksums. If the checksums match, archive-push skips the corresponding file and returns successful execution code. Otherwise, archive-push fails with an error. If you would like to replace WAL files in the case of checksum mismatch, run the archive-push command with the `--overwrite` flag. + +Copying is done to temporary file with `.part` suffix or, if [compression](#compression-options) is used, with `.gz.part` suffix. After copy is done, atomic rename is performed. This algorihtm ensures that failed archive-push will not stall continuous archiving and that concurrent archiving from multiple sources into single WAL archive has no risk of archive corruption. +Copied to archive WAL segments are synced to disk. + +You can use `archive-push` in [archive_command](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND) PostgreSQL parameter to set up [continous WAl archiving](#setting-up-continuous-wal-archiving). + +For details, see sections [Archiving Options](#archiving-options) and [Compression Options](#compression-options). + +#### archive-get + + pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=wal_file_path --wal-file-name=wal_file_name + [--help] [remote_options] [logging_options] + +Copies WAL files from the corresponding subdirectory of the backup catalog to the cluster's write-ahead log location. This command is automatically set by pg_probackup as part of the `restore_command` in 'recovery.conf' when restoring backups using a WAL archive. You do not need to set it manually. + +### Options + +This section describes command-line options for pg_probackup commands. If the option value can be derived from an environment variable, this variable is specified below the command-line option, in the uppercase. Some values can be taken from the pg_probackup.conf configuration file located in the backup catalog. + +For details, see the section [Configuring pg_probackup](#configuring-pg_probackup). + +If an option is specified using more than one method, command-line input has the highest priority, while the pg_probackup.conf settings have the lowest priority. + +#### Common Options +The list of general options. + + -B directory + --backup-path=directory + BACKUP_PATH +Specifies the absolute path to the backup catalog. Backup catalog is a directory where all backup files and meta information are stored. Since this option is required for most of the pg_probackup commands, you are recommended to specify it once in the BACKUP_PATH environment variable. In this case, you do not need to use this option each time on the command line. + + -D directory + --pgdata=directory + PGDATA +Specifies the absolute path to the data directory of the database cluster. This option is mandatory only for the [add-instance](#add-instance) command. Other commands can take its value from the PGDATA environment variable, or from the pg_probackup.conf configuration file. + + -i backup_id + -backup-id=backup_id +Specifies the unique identifier of the backup. + + -j num_threads + --threads=num_threads +Sets the number of parallel threads for backup, restore, merge, validation and verification processes. + + --progress +Shows the progress of operations. + + --help +Shows detailed information about the options that can be used with this command. + +#### Recovery Target Options + +If [continuous WAL archiving](#setting-up-continuous-wal-archiving) is configured, you can use one of these options together with [restore](#restore) or [validate](#validate) commands to specify the moment up to which the database cluster must be restored or validated. + + --recovery-target=immediate|latest +Defines when to stop the recovery: + +- `immediate` value stops the recovery after reaching the consistent state of the specified backup, or the latest available backup if the `-i/--backup_id` option is omitted. +- `latest` value continues the recovery until all WAL segments available in the archive are applied. + +Default value of `--recovery-target` depends on WAL delivery method of restored backup, `immediate` for STREAM backup and `latest` for ARCHIVE. + + --recovery-target-timeline=timeline +Specifies a particular timeline to which recovery will proceed. By default, the timeline of the specified backup is used. + + --recovery-target-lsn=lsn +Specifies the LSN of the write-ahead log location up to which recovery will proceed. Can be used only when restoring database cluster of major version 10 or higher. + + --recovery-target-name=recovery_target_name +Specifies a named savepoint up to which to restore the cluster data. + + --recovery-target-time=time +Specifies the timestamp up to which recovery will proceed. + + --recovery-target-xid=xid +Specifies the transaction ID up to which recovery will proceed. + + --recovery-target-inclusive=boolean +Specifies whether to stop just after the specified recovery target (true), or just before the recovery target (false). This option can only be used together with `--recovery-target-name`, `--recovery-target-time`, `--recovery-target-lsn` or `--recovery-target-xid` options. The default depends on [recovery_target_inclusive](https://www.postgresql.org/docs/current/recovery-target-settings.html#RECOVERY-TARGET-INCLUSIVE) parameter. + + --recovery-target-action=pause|promote|shutdown + Default: pause +Specifies [the action](https://www.postgresql.org/docs/current/recovery-target-settings.html#RECOVERY-TARGET-ACTION) the server should take when the recovery target is reached. + +#### Retention Options + +You can use these options together with [backup](#backup) and [delete](#delete) commands. + +For details on configuring retention policy, see the section [Configuring Retention Policy](#configuring-retention-policy). + + --retention-redundancy=redundancy + Default: 0 +Specifies the number of full backup copies to keep in the data directory. Must be a positive integer. The zero value disables this setting. + + --retention-window=window + Default: 0 +Number of days of recoverability. Must be a positive integer. The zero value disables this setting. + + --wal-depth=wal_depth + Default: 0 +Number of latest valid backups on every timeline that must retain the ability to perform PITR. Must be a positive integer. The zero value disables this setting. + + --delete-wal +Deletes WAL files that are no longer required to restore the cluster from any of the existing backups. + + --delete-expired +Deletes backups that do not conform to the retention policy defined in the pg_probackup.conf configuration file. + + --merge-expired +Merges the oldest incremental backup that satisfies the requirements of retention policy with its parent backups that have already expired. + + --dry-run +Displays the current status of all the available backups, without deleting or merging expired backups, if any. + +##### Pinning Options + +You can use these options together with [backup](#backup) and [set-delete](#set-backup) commands. + +For details on backup pinning, see the section [Backup Pinning](#backup-pinning). + + --ttl=ttl +Specifies the amount of time the backup should be pinned. Must be a positive integer. The zero value unpin already pinned backup. Supported units: ms, s, min, h, d (s by default). Example: `--ttl=30d`. + + --expire-time=time +Specifies the timestamp up to which the backup will stay pinned. Must be a ISO-8601 complaint timestamp. Example: `--expire-time='2020-01-01 00:00:00+03'` + +#### Logging Options + +You can use these options with any command. + + --log-level-console=log_level + Default: info +Controls which message levels are sent to the console log. Valid values are `verbose`, `log`, `info`, `warning`, `error` and `off`. Each level includes all the levels that follow it. The later the level, the fewer messages are sent. The `off` level disables console logging. + +>NOTE: all console log messages are going to stderr, so output from [show](#show) and [show-config](#show-config) commands do not mingle with log messages. + + --log-level-file=log_level + Default: off +Controls which message levels are sent to a log file. Valid values are `verbose`, `log`, `info`, `warning`, `error` and `off`. Each level includes all the levels that follow it. The later the level, the fewer messages are sent. The `off` level disables file logging. + + --log-filename=log_filename + Default: pg_probackup.log +Defines the filenames of the created log files. The filenames are treated as a strftime pattern, so you can use %-escapes to specify time-varying filenames. + +For example, if you specify the 'pg_probackup-%u.log' pattern, pg_probackup generates a separate log file for each day of the week, with %u replaced by the corresponding decimal number: pg_probackup-1.log for Monday, pg_probackup-2.log for Tuesday, and so on. + +This option takes effect if file logging is enabled by the `log-level-file` option. + + --error-log-filename=error_log_filename + Default: none +Defines the filenames of log files for error messages only. The filenames are treated as a strftime pattern, so you can use %-escapes to specify time-varying filenames. + +For example, if you specify the 'error-pg_probackup-%u.log' pattern, pg_probackup generates a separate log file for each day of the week, with %u replaced by the corresponding decimal number: error-pg_probackup-1.log for Monday, error-pg_probackup-2.log for Tuesday, and so on. + +This option is useful for troubleshooting and monitoring. + + --log-directory=log_directory + Default: $BACKUP_PATH/log/ +Defines the directory in which log files will be created. You must specify the absolute path. This directory is created lazily, when the first log message is written. + + --log-rotation-size=log_rotation_size + Default: 0 +Maximum size of an individual log file. If this value is reached, the log file is rotated once a pg_probackup command is launched, except help and version commands. The zero value disables size-based rotation. Supported units: kB, MB, GB, TB (kB by default). + + --log-rotation-age=log_rotation_age + Default: 0 +Maximum lifetime of an individual log file. If this value is reached, the log file is rotated once a pg_probackup command is launched, except help and version commands. The time of the last log file creation is stored in $BACKUP_PATH/log/log_rotation. The zero value disables time-based rotation. Supported units: ms, s, min, h, d (min by default). + +#### Connection Options + +You can use these options together with [backup](#backup) and [checkdb](#checkdb) commands. + +All [libpq environment variables](https://www.postgresql.org/docs/current/libpq-envars.html) are supported. + + -d dbname + --pgdatabase=dbname + PGDATABASE +Specifies the name of the database to connect to. The connection is used only for managing backup process, so you can connect to any existing database. If this option is not provided on the command line, PGDATABASE environment variable, or the pg_probackup.conf configuration file, pg_probackup tries to take this value from the PGUSER environment variable, or from the current user name if PGUSER variable is not set. + + -h host + --pghost=host + PGHOST + Default: local socket +Specifies the host name of the system on which the server is running. If the value begins with a slash, it is used as a directory for the Unix domain socket. + + -p port + --pgport=port + PGPORT + Default: 5432 +Specifies the TCP port or the local Unix domain socket file extension on which the server is listening for connections. + + -U username + --pguser=username + PGUSER +User name to connect as. + + -w + --no-password + Disables a password prompt. If the server requires password authentication and a password is not available by other means such as a [.pgpass](https://www.postgresql.org/docs/current/libpq-pgpass.html) file or PGPASSWORD environment variable, the connection attempt will fail. This flag can be useful in batch jobs and scripts where no user is present to enter a password. + + -W + --password +Forces a password prompt. + +#### Compression Options + +You can use these options together with [backup](#backup) and [archive-push](#archive-push) commands. + + --compress-algorithm=compression_algorithm + Default: none +Defines the algorithm to use for compressing data files. Possible values are `zlib`, `pglz`, and `none`. If set to zlib or pglz, this option enables compression. By default, compression is disabled. +For the [archive-push](#archive-push) command, the pglz compression algorithm is not supported. + + --compress-level=compression_level + Default: 1 +Defines compression level (0 through 9, 0 being no compression and 9 being best compression). This option can be used together with `--compress-algorithm` option. + + --compress +Alias for `--compress-algorithm=zlib` and `--compress-level=1`. + +#### Archiving Options + +These options can be used with [archive-push](#archive-push) command in [archive_command](https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-ARCHIVE-COMMAND) setting and [archive-get](#archive-get) command in [restore_command](https://www.postgresql.org/docs/current/archive-recovery-settings.html#RESTORE-COMMAND) setting. + +Additionally [Remote Mode Options](#remote-mode-options) and [Logging Options](#logging-options) can be used. + + --wal-file-path=wal_file_path +Provides the path to the WAL file in `archive_command` and `restore_command`. The `%p` variable as value for this option is required for correct processing. + + --wal-file-name=wal_file_name +Provides the name of the WAL file in `archive_command` and `restore_command`. The `%f` variable as value is required for correct processing. + + --overwrite +Overwrites archived WAL file. Use this flag together with the [archive-push](#archive-push) command if the specified subdirectory of the backup catalog already contains this WAL file and it needs to be replaced with its newer copy. Otherwise, archive-push reports that a WAL segment already exists, and aborts the operation. If the file to replace has not changed, archive-push skips this file regardless of the `--overwrite` flag. + +#### Remote Mode Options + +This section describes the options related to running pg_probackup operations remotely via SSH. These options can be used with [add-instance](#add-instance), [set-config](#set-config), [backup](#backup), [restore](#restore), [archive-push](#archive-push) and [archive-get](#archive-get) commands. + +For details on configuring and usage of remote operation mode, see the sections [Configuring the Remote Mode](#configuring-the-remote-mode) and [Using pg_probackup in the Remote Mode](#using-pg_probackup-in-the-remote-mode). + + --remote-proto=proto +Specifies the protocol to use for remote operations. Currently only the SSH protocol is supported. Possible values are: + +- `ssh` enables the remote backup mode via SSH. This is the Default value. +- `none` explicitly disables the remote mode. + +You can omit this option if the `--remote-host` option is specified. + + --remote-host=destination +Specifies the remote host IP address or hostname to connect to. + + --remote-port=port + Default: 22 +Specifies the remote host port to connect to. + + --remote-user=username + Default: current user +Specifies remote host user for SSH connection. If you omit this option, the current user initiating the SSH connection is used. + + --remote-path=path +Specifies pg_probackup installation directory on the remote system. + + --ssh-options=ssh_options +Specifies a string of SSH command-line options. For example, the following options can used to set keep-alive for ssh connections opened by pg_probackup: `--ssh-options='-o ServerAliveCountMax=5 -o ServerAliveInterval=60'`. Full list of possible options can be found on [ssh_config manual page](https://man.openbsd.org/ssh_config.5). + +#### Remote WAL Archive Options + +This section describes the options used to provide the arguments for [Remote Mode Options](#remote-mode-options) in [archive-get](#archive-get) used in [restore_command](https://www.postgresql.org/docs/current/archive-recovery-settings.html#RESTORE-COMMAND) command when restoring ARCHIVE backup or performing PITR. + + --archive-host=destination +Provides the argument for `--remote-host` option in `archive-get` command. + + --archive-port=port + Default: 22 +Provides the argument for `--remote-port` option in `archive-get` command. + + --archive-user=username + Default: PostgreSQL user +Provides the argument for `--remote-user` option in `archive-get` command. If you omit this option, the the user running PostgreSQL cluster is used. + +#### Partial Restore Options + +This section describes the options related to partial restore of a cluster from backup. These options can be used with [restore](#restore) command. + + --db-exclude=dbname +Specifies database name to exclude from restore. All other databases in the cluster will be restored as usual, including `template0` and `template1`. This option can be specified multiple times for multiple databases. + + --db-include=dbname +Specifies database name to restore from backup. All other databases in the cluster will not be restored, with exception of `template0` and `template1`. This option can be specified multiple times for multiple databases. + +#### Replica Options + +This section describes the options related to taking a backup from standby. + +>NOTE: Starting from pg_probackup 2.0.24, backups can be taken from standby without connecting to the master server, so these options are no longer required. In lower versions, pg_probackup had to connect to the master to determine recovery time — the earliest moment for which you can restore a consistent state of the database cluster. + + --master-db=dbname + Default: postgres, the default PostgreSQL database. +Deprecated. Specifies the name of the database on the master server to connect to. The connection is used only for managing the backup process, so you can connect to any existing database. Can be set in the pg_probackup.conf using the [set-config](#set-config) command. + + --master-host=host +Deprecated. Specifies the host name of the system on which the master server is running. + + --master-port=port + Default: 5432, the PostgreSQL default port. +Deprecated. Specifies the TCP port or the local Unix domain socket file extension on which the master server is listening for connections. + + --master-user=username + Default: postgres, the PostgreSQL default user name. +Deprecated. User name to connect as. + + --replica-timeout=timeout + Default: 300 sec +Deprecated. Wait time for WAL segment streaming via replication, in seconds. By default, pg_probackup waits 300 seconds. You can also define this parameter in the pg_probackup.conf configuration file using the [set-config](#set-config) command. + +## Howto + +All examples below assume the remote mode of operations via `ssh`. If you are planning to run backup and restore operation locally then step `Setup passwordless SSH connection` can be skipped and all `--remote-*` options can be ommited. + +Examples are based on Ubuntu 18.04, PostgreSQL 11 and pg_probackup 2.2.0. + +- *backup_host* - host with backup catalog. +- *backupman* - user on `backup_host` running all pg_probackup operations. +- */mnt/backups* - directory on `backup_host` where backup catalog is stored. +- *postgres_host* - host with PostgreSQL cluster. +- *postgres* - user on `postgres_host` which run PostgreSQL cluster. +- */var/lib/postgresql/11/main* - directory on `postgres_host` where PGDATA of PostgreSQL cluster is located. +- *backupdb* - database used for connection to PostgreSQL cluster. + +### Minimal Setup + +This setup is relying on autonomous FULL and DELTA backups. + +#### Setup passwordless SSH connection from `backup_host` to `postgres_host` +``` +[backupman@backup_host] ssh-copy-id postgres@postgres_host +``` + +#### Setup PostgreSQL cluster + +It is recommended from security purposes to use separate database for backup operations. +``` +postgres=# +CREATE DATABASE backupdb; +``` + +Connect to `backupdb` database, create role `probackup` and grant to it the following permissions: +``` +backupdb=# +BEGIN; +CREATE ROLE probackup WITH LOGIN REPLICATION; +GRANT USAGE ON SCHEMA pg_catalog TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO probackup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_checkpoint() TO probackup; +COMMIT; +``` + +#### Init the backup catalog +``` +[backupman@backup_host]$ pg_probackup-11 init -B /mnt/backups +INFO: Backup catalog '/mnt/backups' successfully inited +``` + +#### Add instance 'pg-11' to backup catalog +``` +[backupman@backup_host]$ pg_probackup-11 add-instance -B /mnt/backups --instance 'pg-11' --remote-host=postgres_host --remote-user=postgres -D /var/lib/postgresql/11/main +INFO: Instance 'node' successfully inited +``` + +#### Take FULL backup +``` +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' -b FULL --stream --remote-host=postgres_host --remote-user=postgres -U probackup -d backupdb +INFO: Backup start, pg_probackup version: 2.2.0, instance: node, backup ID: PZ7YK2, backup mode: FULL, wal mode: STREAM, remote: true, compress-algorithm: none, compress-level: 1 +INFO: Start transferring data files +INFO: Data files are transferred +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +INFO: Validating backup PZ7YK2 +INFO: Backup PZ7YK2 data files are valid +INFO: Backup PZ7YK2 resident size: 196MB +INFO: Backup PZ7YK2 completed +``` + +#### Lets take a look at the backup catalog +``` +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' + +BACKUP INSTANCE 'pg-11' +================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +================================================================================================================================== + node 11 PZ7YK2 2019-10-11 19:45:45+03 FULL STREAM 1/0 11s 180MB 16MB 1.00 0/3C000028 0/3C000198 OK +``` + +#### Take incremental backup in DELTA mode +``` +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' -b delta --stream --remote-host=postgres_host --remote-user=postgres -U probackup -d backupdb +INFO: Backup start, pg_probackup version: 2.2.0, instance: node, backup ID: PZ7YMP, backup mode: DELTA, wal mode: STREAM, remote: true, compress-algorithm: none, compress-level: 1 +INFO: Parent backup: PZ7YK2 +INFO: Start transferring data files +INFO: Data files are transferred +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +INFO: Validating backup PZ7YMP +INFO: Backup PZ7YMP data files are valid +INFO: Backup PZ7YMP resident size: 32MB +INFO: Backup PZ7YMP completed +``` + +#### Lets hide some parameters into config, so cmdline can be less crowdy +``` +[backupman@backup_host] pg_probackup-11 set-config -B /mnt/backups --instance 'pg-11' --remote-host=postgres_host --remote-user=postgres -U probackup -d backupdb +``` + +#### Take another incremental backup in DELTA mode, omitting some of the previous parameters: +``` +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' -b delta --stream +INFO: Backup start, pg_probackup version: 2.2.0, instance: node, backup ID: PZ7YR5, backup mode: DELTA, wal mode: STREAM, remote: true, compress-algorithm: none, compress-level: 1 +INFO: Parent backup: PZ7YMP +INFO: Start transferring data files +INFO: Data files are transferred +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +INFO: Validating backup PZ7YR5 +INFO: Backup PZ7YR5 data files are valid +INFO: Backup PZ7YR5 resident size: 32MB +INFO: Backup PZ7YR5 completed +``` + +#### Lets take a look at instance config +``` +[backupman@backup_host] pg_probackup-11 show-config -B /mnt/backups --instance 'pg-11' + +# Backup instance information +pgdata = /var/lib/postgresql/11/main +system-identifier = 6746586934060931492 +xlog-seg-size = 16777216 +# Connection parameters +pgdatabase = backupdb +pghost = postgres_host +pguser = probackup +# Replica parameters +replica-timeout = 5min +# Archive parameters +archive-timeout = 5min +# Logging parameters +log-level-console = INFO +log-level-file = OFF +log-filename = pg_probackup.log +log-rotation-size = 0 +log-rotation-age = 0 +# Retention parameters +retention-redundancy = 0 +retention-window = 0 +wal-depth = 0 +# Compression parameters +compress-algorithm = none +compress-level = 1 +# Remote access parameters +remote-proto = ssh +remote-host = postgres_host +``` + +Note, that we are getting default values for other options, that were not overwritten by set-config command. + + +#### Lets take a look at the backup catalog +``` +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' + +==================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +==================================================================================================================================== + node 11 PZ7YR5 2019-10-11 19:49:56+03 DELTA STREAM 1/1 10s 112kB 32MB 1.00 0/41000028 0/41000160 OK + node 11 PZ7YMP 2019-10-11 19:47:16+03 DELTA STREAM 1/1 10s 376kB 32MB 1.00 0/3E000028 0/3F0000B8 OK + node 11 PZ7YK2 2019-10-11 19:45:45+03 FULL STREAM 1/0 11s 180MB 16MB 1.00 0/3C000028 0/3C000198 OK +``` + +## Authors +Postgres Professional, Moscow, Russia. + +## Credits +pg_probackup utility is based on pg_arman, that was originally written by NTT and then developed and maintained by Michael Paquier. diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..dc4e8b8d5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015-2019, Postgres Professional +Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + +Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/Makefile b/Makefile index e22bc86cb..41502286b 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,23 @@ PROGRAM = pg_probackup -OBJS = src/backup.o src/catalog.o src/configure.o src/data.o \ - src/delete.o src/dir.o src/fetch.o src/help.o src/init.o \ - src/pg_probackup.o src/restore.o src/show.o src/status.o \ - src/util.o src/validate.o src/datapagemap.o src/parsexlog.o \ - src/xlogreader.o src/streamutil.o src/receivelog.o \ - src/archive.o src/utils/parray.o src/utils/pgut.o src/utils/logger.o \ - src/utils/json.o src/utils/thread.o src/merge.o -EXTRA_CLEAN = src/datapagemap.c src/datapagemap.h src/xlogreader.c \ - src/receivelog.c src/receivelog.h src/streamutil.c src/streamutil.h src/logging.h +# utils +OBJS = src/utils/configuration.o src/utils/json.o src/utils/logger.o \ + src/utils/parray.o src/utils/pgut.o src/utils/thread.o src/utils/remote.o src/utils/file.o -INCLUDES = src/datapagemap.h src/logging.h src/receivelog.h src/streamutil.h +OBJS += src/archive.o src/backup.o src/catalog.o src/checkdb.o src/configure.o src/data.o \ + src/delete.o src/dir.o src/fetch.o src/help.o src/init.o src/merge.o \ + src/parsexlog.o src/ptrack.o src/pg_probackup.o src/restore.o src/show.o src/util.o \ + src/validate.o + +# borrowed files +OBJS += src/pg_crc.o src/datapagemap.o src/receivelog.o src/streamutil.o \ + src/xlogreader.o + +EXTRA_CLEAN = src/pg_crc.c src/datapagemap.c src/datapagemap.h \ + src/receivelog.c src/receivelog.h src/streamutil.c src/streamutil.h \ + src/xlogreader.c src/instr_time.h + +INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h src/instr_time.h ifdef USE_PGXS PG_CONFIG = pg_config @@ -32,28 +39,35 @@ else srchome=$(top_srcdir) endif -ifeq ($(MAJORVERSION),10) +#ifneq (,$(filter 9.5 9.6 10 11,$(MAJORVERSION))) +ifneq (12,$(MAJORVERSION)) +EXTRA_CLEAN += src/logging.h +INCLUDES += src/logging.h +endif + +ifeq (,$(filter 9.5 9.6,$(MAJORVERSION))) OBJS += src/walmethods.o EXTRA_CLEAN += src/walmethods.c src/walmethods.h INCLUDES += src/walmethods.h endif -PG_CPPFLAGS = -I$(libpq_srcdir) ${PTHREAD_CFLAGS} -Isrc + +PG_CPPFLAGS = -I$(libpq_srcdir) ${PTHREAD_CFLAGS} -Isrc -I$(top_srcdir)/$(subdir)/src override CPPFLAGS := -DFRONTEND $(CPPFLAGS) $(PG_CPPFLAGS) -PG_LIBS = $(libpq_pgport) ${PTHREAD_CFLAGS} +PG_LIBS_INTERNAL = $(libpq_pgport) ${PTHREAD_CFLAGS} all: checksrcdir $(INCLUDES); $(PROGRAM): $(OBJS) -src/xlogreader.c: $(top_srcdir)/src/backend/access/transam/xlogreader.c - rm -f $@ && $(LN_S) $(srchome)/src/backend/access/transam/xlogreader.c $@ +src/instr_time.h: $(top_srcdir)/src/include/portability/instr_time.h + rm -f $@ && $(LN_S) $(srchome)/src/include/portability/instr_time.h $@ src/datapagemap.c: $(top_srcdir)/src/bin/pg_rewind/datapagemap.c rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/datapagemap.c $@ src/datapagemap.h: $(top_srcdir)/src/bin/pg_rewind/datapagemap.h rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/datapagemap.h $@ -src/logging.h: $(top_srcdir)/src/bin/pg_rewind/logging.h - rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/logging.h $@ +src/pg_crc.c: $(top_srcdir)/src/backend/utils/hash/pg_crc.c + rm -f $@ && $(LN_S) $(srchome)/src/backend/utils/hash/pg_crc.c $@ src/receivelog.c: $(top_srcdir)/src/bin/pg_basebackup/receivelog.c rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_basebackup/receivelog.c $@ src/receivelog.h: $(top_srcdir)/src/bin/pg_basebackup/receivelog.h @@ -62,9 +76,16 @@ src/streamutil.c: $(top_srcdir)/src/bin/pg_basebackup/streamutil.c rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_basebackup/streamutil.c $@ src/streamutil.h: $(top_srcdir)/src/bin/pg_basebackup/streamutil.h rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_basebackup/streamutil.h $@ +src/xlogreader.c: $(top_srcdir)/src/backend/access/transam/xlogreader.c + rm -f $@ && $(LN_S) $(srchome)/src/backend/access/transam/xlogreader.c $@ +#ifneq (,$(filter 9.5 9.6 10 11,$(MAJORVERSION))) +ifneq (12,$(MAJORVERSION)) +src/logging.h: $(top_srcdir)/src/bin/pg_rewind/logging.h + rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/logging.h $@ +endif -ifeq ($(MAJORVERSION),10) +ifeq (,$(filter 9.5 9.6,$(MAJORVERSION))) src/walmethods.c: $(top_srcdir)/src/bin/pg_basebackup/walmethods.c rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_basebackup/walmethods.c $@ src/walmethods.h: $(top_srcdir)/src/bin/pg_basebackup/walmethods.h diff --git a/README.md b/README.md index 1471d648f..7827f8b64 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,29 @@ +[![Build Status](https://travis-ci.com/postgrespro/pg_probackup.svg?branch=master)](https://travis-ci.com/postgrespro/pg_probackup) +[![GitHub release](https://img.shields.io/github/v/release/postgrespro/pg_probackup?include_prereleases)](https://github.com/postgrespro/pg_probackup/releases/latest) + # pg_probackup `pg_probackup` is a utility to manage backup and recovery of PostgreSQL database clusters. It is designed to perform periodic backups of the PostgreSQL instance that enable you to restore the server in case of a failure. The utility is compatible with: -* PostgreSQL 9.5, 9.6, 10; - -`PTRACK` backup support provided via following options: -* vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch) -* Postgres Pro Standard 9.5, 9.6 -* Postgres Pro Enterprise +* PostgreSQL 9.5, 9.6, 10, 11, 12; As compared to other backup solutions, `pg_probackup` offers the following benefits that can help you implement different backup strategies and deal with large amounts of data: -* Choosing between full and page-level incremental backups to speed up backup and recovery -* Implementing a single backup strategy for multi-server PostgreSQL clusters -* Automatic data consistency checks and on-demand backup validation without actual data recovery -* Managing backups in accordance with retention policy -* Running backup, restore, and validation processes on multiple parallel threads -* Storing backup data in a compressed state to save disk space -* Taking backups from a standby server to avoid extra load on the master server -* Extended logging settings -* Custom commands to simplify WAL log archiving +* Incremental backup: page-level incremental backup allows you to save disk space, speed up backup and restore. With three different incremental modes, you can plan the backup strategy in accordance with your data flow. +* Incremental restore: page-level incremental restore allows you dramatically speed up restore by reusing valid unchanged pages in destination directory. +* Merge: using this feature allows you to implement "incrementally updated backups" strategy, eliminating the need to do periodical full backups. +* Validation: automatic data consistency checks and on-demand backup validation without actual data recovery +* Verification: on-demand verification of PostgreSQL instance with the `checkdb` command. +* Retention: managing WAL archive and backups in accordance with retention policy. You can configure retention policy based on recovery time or the number of backups to keep, as well as specify `time to live` (TTL) for a particular backup. Expired backups can be merged or deleted. +* Parallelization: running backup, restore, merge, delete, verificaton and validation processes on multiple parallel threads +* Compression: storing backup data in a compressed state to save disk space +* Deduplication: saving disk space by not copying unchanged non-data files, such as `_vm` or `_fsm` +* Remote operations: backing up PostgreSQL instance located on a remote system or restoring a backup remotely +* Backup from standby: avoid extra load on master by taking backups from a standby server +* External directories: backing up files and directories located outside of the PostgreSQL `data directory` (PGDATA), such as scripts, configuration files, logs, or SQL dump files. +* Backup Catalog: get list of backups and corresponding meta information in plain text or JSON formats +* Archive catalog: getting the list of all WAL timelines and the corresponding meta information in plain text or JSON formats +* Partial Restore: restore only the specified databases or exclude the specified databases from restore. To manage backup data, `pg_probackup` creates a backup catalog. This directory stores all backup files with additional meta information, as well as WAL archives required for [point-in-time recovery](https://postgrespro.com/docs/postgresql/current/continuous-archiving.html). You can store backups for different instances in separate subdirectories of a single backup catalog. @@ -30,62 +34,158 @@ Using `pg_probackup`, you can take full or incremental backups: * `DELTA` backup. In this mode, `pg_probackup` read all data files in PGDATA directory and only those pages, that where changed since previous backup, are copied. Continuous archiving is not necessary for it to operate. Also this mode could impose read-only I/O pressure equal to `Full` backup. * `PTRACK` backup. In this mode, PostgreSQL tracks page changes on the fly. Continuous archiving is not necessary for it to operate. Each time a relation page is updated, this page is marked in a special `PTRACK` bitmap for this relation. As one page requires just one bit in the `PTRACK` fork, such bitmaps are quite small. Tracking implies some minor overhead on the database server operation, but speeds up incremental backups significantly. -Regardless of the chosen backup type, all backups taken with `pg_probackup` support the following archiving strategies: -* `Autonomous backups` include all the files required to restore the cluster to a consistent state at the time the backup was taken. Even if continuous archiving is not set up, the required WAL segments are included into the backup. -* `Archive backups` rely on continuous archiving. Such backups enable cluster recovery to an arbitrary point after the backup was taken (point-in-time recovery). +Regardless of the chosen backup type, all backups taken with `pg_probackup` support the following strategies of WAL delivery: +* `Autonomous backups` streams via replication protocol all the WAL files required to restore the cluster to a consistent state at the time the backup was taken. Even if continuous archiving is not set up, the required WAL segments are included into the backup. +* `Archive backups` rely on continuous archiving. + +## ptrack support + +`PTRACK` backup support provided via following options: +* vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch) +* vanilla PostgreSQL 12 with [ptrack extension](https://github.com/postgrespro/ptrack) +* Postgres Pro Standard 9.6, 10, 11, 12 +* Postgres Pro Enterprise 9.6, 10, 11, 12 ## Limitations `pg_probackup` currently has the following limitations: -* Creating backups from a remote server is currently not supported. -* The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#guc-block-size) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#guc-wal-block-size) parameters and have the same major release number. -* Microsoft Windows operating system is not supported. -* Configuration files outside of PostgreSQL data directory are not included into the backup and should be backed up separately. +* The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-BLOCK-SIZE) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-WAL-BLOCK-SIZE) parameters and have the same major release number. +* Remote backup via ssh on Windows currently is not supported. +* When running remote operations via ssh, remote and local pg_probackup versions must be the same. + +## Documentation + +Documentation can be found at [github](https://postgrespro.github.io/pg_probackup) and [Postgres Professional documentation](https://postgrespro.com/docs/postgrespro/current/app-pgprobackup) ## Installation and Setup +### Windows Installation +Installers are available in release **assets**. [Latests](https://github.com/postgrespro/pg_probackup/releases/2.4.1). + ### Linux Installation +#### pg_probackup for vanilla PostgreSQL ```shell #DEB Ubuntu|Debian Packages -echo "deb [arch=amd64] http://repo.postgrespro.ru/pg_probackup/deb/ $(lsb_release -cs) main-$(lsb_release -cs)" > /etc/apt/sources.list.d/pg_probackup.list -wget -O - http://repo.postgrespro.ru/pg_probackup/keys/GPG-KEY-PG_PROBACKUP | apt-key add - && apt-get update -apt-get install pg-probackup-{10,9.6,9.5} +sudo echo "deb [arch=amd64] https://repo.postgrespro.ru/pg_probackup/deb/ $(lsb_release -cs) main-$(lsb_release -cs)" > /etc/apt/sources.list.d/pg_probackup.list +sudo wget -O - https://repo.postgrespro.ru/pg_probackup/keys/GPG-KEY-PG_PROBACKUP | sudo apt-key add - && sudo apt-get update +sudo apt-get install pg-probackup-{12,11,10,9.6,9.5} +sudo apt-get install pg-probackup-{12,11,10,9.6,9.5}-dbg #DEB-SRC Packages -echo "deb-src [arch=amd64] http://repo.postgrespro.ru/pg_probackup/deb/ $(lsb_release -cs) main-$(lsb_release -cs)" >>\ - /etc/apt/sources.list.d/pg_probackup.list -apt-get source pg-probackup-{10,9.6,9.5} +sudo echo "deb-src [arch=amd64] https://repo.postgrespro.ru/pg_probackup/deb/ $(lsb_release -cs) main-$(lsb_release -cs)" >>\ + /etc/apt/sources.list.d/pg_probackup.list && sudo apt-get update +sudo apt-get source pg-probackup-{12,11,10,9.6,9.5} #RPM Centos Packages -rpm -ivh http://repo.postgrespro.ru/pg_probackup/keys/pg_probackup-repo-centos.noarch.rpm -yum install pg_probackup-{10,9.6,9.5} +rpm -ivh https://repo.postgrespro.ru/pg_probackup/keys/pg_probackup-repo-centos.noarch.rpm +yum install pg_probackup-{12,11,10,9.6,9.5} +yum install pg_probackup-{12,11,10,9.6,9.5}-debuginfo #RPM RHEL Packages -rpm -ivh http://repo.postgrespro.ru/pg_probackup/keys/pg_probackup-repo-rhel.noarch.rpm -yum install pg_probackup-{10,9.6,9.5} +rpm -ivh https://repo.postgrespro.ru/pg_probackup/keys/pg_probackup-repo-rhel.noarch.rpm +yum install pg_probackup-{12,11,10,9.6,9.5} +yum install pg_probackup-{12,11,10,9.6,9.5}-debuginfo #RPM Oracle Linux Packages -rpm -ivh http://repo.postgrespro.ru/pg_probackup/keys/pg_probackup-repo-oraclelinux.noarch.rpm -yum install pg_probackup-{10,9.6,9.5} +rpm -ivh https://repo.postgrespro.ru/pg_probackup/keys/pg_probackup-repo-oraclelinux.noarch.rpm +yum install pg_probackup-{12,11,10,9.6,9.5} +yum install pg_probackup-{12,11,10,9.6,9.5}-debuginfo #SRPM Packages -yumdownloader --source pg_probackup-{10,9.6,9.5} +yumdownloader --source pg_probackup-{12,11,10,9.6,9.5} + +#RPM ALT Linux 7 +sudo echo "rpm https://repo.postgrespro.ru/pg_probackup/rpm/latest/altlinux-p7 x86_64 vanilla" > /etc/apt/sources.list.d/pg_probackup.list +sudo apt-get update +sudo apt-get install pg_probackup-{12,11,10,9.6,9.5} +sudo apt-get install pg_probackup-{12,11,10,9.6,9.5}-debuginfo + +#RPM ALT Linux 8 +sudo echo "rpm https://repo.postgrespro.ru/pg_probackup/rpm/latest/altlinux-p8 x86_64 vanilla" > /etc/apt/sources.list.d/pg_probackup.list +sudo apt-get update +sudo apt-get install pg_probackup-{12,11,10,9.6,9.5} +sudo apt-get install pg_probackup-{12,11,10,9.6,9.5}-debuginfo + +#RPM ALT Linux 9 +sudo echo "rpm https://repo.postgrespro.ru/pg_probackup/rpm/latest/altlinux-p9 x86_64 vanilla" > /etc/apt/sources.list.d/pg_probackup.list +sudo apt-get update +sudo apt-get install pg_probackup-{12,11,10,9.6,9.5} +sudo apt-get install pg_probackup-{12,11,10,9.6,9.5}-debuginfo ``` -To compile `pg_probackup`, you must have a PostgreSQL installation and raw source tree. To install `pg_probackup`, execute this in the module's directory: +#### pg_probackup for PostgresPro Standard and Enterprise +```shell +#DEB Ubuntu|Debian Packages +sudo echo "deb [arch=amd64] https://repo.postgrespro.ru/pg_probackup-forks/deb/ $(lsb_release -cs) main-$(lsb_release -cs)" > /etc/apt/sources.list.d/pg_probackup-forks.list +sudo wget -O - https://repo.postgrespro.ru/pg_probackup-forks/keys/GPG-KEY-PG_PROBACKUP | sudo apt-key add - && sudo apt-get update +sudo apt-get install pg-probackup-{std,ent}-{12,11,10,9.6} +sudo apt-get install pg-probackup-{std,ent}-{12,11,10,9.6}-dbg + +#RPM Centos Packages +rpm -ivh https://repo.postgrespro.ru/pg_probackup-forks/keys/pg_probackup-repo-forks-centos.noarch.rpm +yum install pg_probackup-{std,ent}-{12,11,10,9.6} +yum install pg_probackup-{std,ent}-{12,11,10,9.6}-debuginfo + +#RPM RHEL Packages +rpm -ivh https://repo.postgrespro.ru/pg_probackup-forks/keys/pg_probackup-repo-forks-rhel.noarch.rpm +yum install pg_probackup-{std,ent}-{12,11,10,9.6} +yum install pg_probackup-{std,ent}-{12,11,10,9.6}-debuginfo + +#RPM Oracle Linux Packages +rpm -ivh https://repo.postgrespro.ru/pg_probackup-forks/keys/pg_probackup-repo-forks-oraclelinux.noarch.rpm +yum install pg_probackup-{std,ent}-{12,11,10,9.6} +yum install pg_probackup-{std,ent}-{12,11,10,9.6}-debuginfo + +#RPM ALT Linux 7 +sudo echo "rpm https://repo.postgrespro.ru/pg_probackup-forks/rpm/latest/altlinux-p7 x86_64 forks" > /etc/apt/sources.list.d/pg_probackup_forks.list +sudo apt-get update +sudo apt-get install pg_probackup-{std,ent}-{12,11,10,9.6} +sudo apt-get install pg_probackup-{std,ent}-{12,11,10,9.6}-debuginfo + +#RPM ALT Linux 8 +sudo echo "rpm https://repo.postgrespro.ru/pg_probackup-forks/rpm/latest/altlinux-p8 x86_64 forks" > /etc/apt/sources.list.d/pg_probackup_forks.list +sudo apt-get update +sudo apt-get install pg_probackup-{std,ent}-{12,11,10,9.6} +sudo apt-get install pg_probackup-{std,ent}-{12,11,10,9.6}-debuginfo + +#RPM ALT Linux 9 +sudo echo "rpm https://repo.postgrespro.ru/pg_probackup-forks/rpm/latest/altlinux-p9 x86_64 forks" > /etc/apt/sources.list.d/pg_probackup_forks.list && sudo apt-get update +sudo apt-get install pg_probackup-{std,ent}-{12,11,10,9.6} +sudo apt-get install pg_probackup-{std,ent}-{12,11,10,9.6}-debuginfo +``` + +Once you have `pg_probackup` installed, complete [the setup](https://github.com/postgrespro/pg_probackup/blob/master/Documentation.md#installation-and-setup). + +## Building from source +### Linux + +To compile `pg_probackup`, you must have a PostgreSQL installation and raw source tree. Execute this in the module's directory: ```shell make USE_PGXS=1 PG_CONFIG= top_srcdir= ``` -Once you have `pg_probackup` installed, complete [the setup](https://postgrespro.com/docs/postgrespro/current/app-pgprobackup.html#pg-probackup-install-and-setup). +The alternative way, without using the PGXS infrastructure, is to place `pg_probackup` source directory into `contrib` directory and build it there. Example: -## Documentation +```shell +cd && git clone https://github.com/postgrespro/pg_probackup contrib/pg_probackup && cd contrib/pg_probackup && make +``` + +### Windows -Currently the latest documentation can be found at [Postgres Pro Enterprise documentation](https://postgrespro.com/docs/postgrespro/current/app-pgprobackup). +Currently pg_probackup can be build using only MSVC 2013. +Build PostgreSQL using [pgwininstall](https://github.com/postgrespro/pgwininstall) or [PostgreSQL instruction](https://www.postgresql.org/docs/10/install-windows-full.html) with MSVC 2013. +If zlib support is needed, src/tools/msvc/config.pl must contain path to directory with compiled zlib. [Example](https://gist.githubusercontent.com/gsmol/80989f976ce9584824ae3b1bfb00bd87/raw/240032950d4ac4801a79625dd00c8f5d4ed1180c/gistfile1.txt) + +```shell +CALL "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall" amd64 +SET PATH=%PATH%;C:\Perl64\bin +SET PATH=%PATH%;C:\msys64\usr\bin +gen_probackup_project.pl C:\path_to_postgresql_source_tree +``` -## Licence +## License -This module available under the same license as [PostgreSQL](https://www.postgresql.org/about/licence/). +This module available under the [license](LICENSE) similar to [PostgreSQL](https://www.postgresql.org/about/license/). ## Feedback @@ -97,4 +197,4 @@ Postgres Professional, Moscow, Russia. ## Credits -`pg_probackup` utility is based on `pg_arman`, that was originally written by NTT and then developed and maintained by Michael Paquier. \ No newline at end of file +`pg_probackup` utility is based on `pg_arman`, that was originally written by NTT and then developed and maintained by Michael Paquier. diff --git a/doc/Readme.md b/doc/Readme.md new file mode 100644 index 000000000..b9c74769e --- /dev/null +++ b/doc/Readme.md @@ -0,0 +1,5 @@ +# Generating documentation +``` +xmllint --noout --valid probackup.xml +xsltproc stylesheet.xsl probackup.xml >pg-probackup.html +``` \ No newline at end of file diff --git a/doc/pgprobackup.xml b/doc/pgprobackup.xml new file mode 100644 index 000000000..26211b30e --- /dev/null +++ b/doc/pgprobackup.xml @@ -0,0 +1,5641 @@ + + + + + pg_probackup + + + + pg_probackup + 1 + Application + + + + pg_probackup + manage backup and recovery of PostgreSQL database clusters + + + + + + pg_probackup + + + + pg_probackup + + command + + + pg_probackup + + backup_dir + + + pg_probackup + + backup_dir + data_dir + instance_name + + + pg_probackup + + backup_dir + instance_name + + + pg_probackup + + backup_dir + instance_name + option + + + pg_probackup + + backup_dir + instance_name + backup_id + option + + + pg_probackup + + backup_dir + instance_name + + + + pg_probackup + + backup_dir + option + + + pg_probackup + + backup_dir + instance_name + backup_mode + option + + + pg_probackup + + backup_dir + instance_name + option + + + pg_probackup + + backup_dir + instance_name + data_dir + + + + pg_probackup + + backup_dir + option + + + pg_probackup + + backup_dir + instance_name + backup_id + option + + + pg_probackup + + backup_dir + instance_name + + backup_id + + + + + option + + + pg_probackup + + backup_dir + instance_name + wal_file_name + option + + + pg_probackup + + backup_dir + instance_name + wal_file_path + wal_file_name + option + + + + + + + Description + + + pg_probackup is a utility to manage backup and + recovery of PostgreSQL database clusters. + It is designed to perform periodic backups of the PostgreSQL + instance that enable you to restore the server in case of a failure. + pg_probackup supports PostgreSQL 9.5 or higher. + + + + + Overview + + + Installation and Setup + + + Command-Line Reference + + + Usage + + + + + + + + Overview + + + + As compared to other backup solutions, pg_probackup offers the + following benefits that can help you implement different backup + strategies and deal with large amounts of data: + + + + + Incremental backup: with three different incremental modes, + you can plan the backup strategy in accordance with your data flow. + Incremental backups allow you to save disk space + and speed up backup as compared to taking full backups. + It is also faster to restore the cluster by applying incremental + backups than by replaying WAL files. + + + + + Incremental restore: speed up restore from backup by reusing + valid unchanged pages available in PGDATA. + + + + + Validation: automatic data consistency checks and on-demand + backup validation without actual data recovery. + + + + + Verification: on-demand verification of PostgreSQL instance + with the checkdb command. + + + + + Retention: managing WAL archive and backups in accordance with + retention policy. You can configure retention policy based on recovery time + or the number of backups to keep, as well as specify time to live (TTL) + for a particular backup. Expired backups can be merged or deleted. + + + + + Parallelization: running backup, + restore, merge, + delete, validate, + and checkdb processes on multiple parallel threads. + + + + + Compression: storing backup data in a compressed state to save + disk space. + + + + + Deduplication: saving disk space by excluding non-data + files (such as _vm or _fsm) + from incremental backups if these files have not changed since + they were copied into one of the previous backups in this incremental chain. + + + + + Remote operations: backing up PostgreSQL + instance located on a remote system or restoring a backup remotely. + + + + + Backup from standby: avoiding extra load on master by + taking backups from a standby server. + + + + + External directories: backing up files and directories + located outside of the PostgreSQL data + directory (PGDATA), such as scripts, configuration + files, logs, or SQL dump files. + + + + + Backup catalog: getting the list of backups and the corresponding meta + information in plain text or + JSON formats. + + + + + Archive catalog: getting the list of all WAL timelines and + the corresponding meta information in plain text or + JSON formats. + + + + + Partial restore: restoring only the specified databases. + + + + + To manage backup data, pg_probackup creates a + backup catalog. This is a directory that stores + all backup files with additional meta information, as well as WAL + archives required for point-in-time recovery. You can store + backups for different instances in separate subdirectories of a + single backup catalog. + + + Using pg_probackup, you can take full or incremental + backups: + + + + + FULL backups contain all the data files required to restore + the database cluster. + + + + + Incremental backups operate at the page level, only storing the data that has changed since + the previous backup. It allows you to save disk space + and speed up the backup process as compared to taking full backups. + It is also faster to restore the cluster by applying incremental + backups than by replaying WAL files. pg_probackup supports + the following modes of incremental backups: + + + + + DELTA backup. In this mode, pg_probackup reads all data + files in the data directory and copies only those pages + that have changed since the previous backup. This + mode can impose read-only I/O pressure equal to a full + backup. + + + + + PAGE backup. In this mode, pg_probackup scans all WAL + files in the archive from the moment the previous full or + incremental backup was taken. Newly created backups + contain only the pages that were mentioned in WAL records. + This requires all the WAL files since the previous backup + to be present in the WAL archive. If the size of these + files is comparable to the total size of the database + cluster files, speedup is smaller, but the backup still + takes less space. You have to configure WAL archiving as + explained in Setting + up continuous WAL archiving to make PAGE backups. + + + + + PTRACK backup. In this mode, PostgreSQL tracks page + changes on the fly. Continuous archiving is not necessary + for it to operate. Each time a relation page is updated, + this page is marked in a special PTRACK bitmap. Tracking implies some + minor overhead on the database server operation, but + speeds up incremental backups significantly. + + + + + + + pg_probackup can take only physical online backups, and online + backups require WAL for consistent recovery. So regardless of the + chosen backup mode (FULL, PAGE or DELTA), any backup taken with + pg_probackup must use one of the following + WAL delivery modes: + + + + + ARCHIVE. Such backups rely + on + continuous + archiving to ensure consistent recovery. This is the + default WAL delivery mode. + + + + + STREAM. Such backups + include all the files required to restore the cluster to a + consistent state at the time the backup was taken. Regardless + of + continuous + archiving having been set up or not, the WAL segments required + for consistent recovery are streamed via + replication protocol during backup and included into the + backup files. That's why such backups are called + autonomous, or standalone. + + + + + Limitations + + pg_probackup currently has the following limitations: + + + + pg_probackup only supports PostgreSQL 9.5 and higher. + + + + + The remote mode is not supported on Windows systems. + + + + + On Unix systems, for PostgreSQL 10 or higher, + a backup can be made only by the same OS user that has started the PostgreSQL + server. For example, if PostgreSQL server is started by + user postgres, the backup command must also be run + by user postgres. To satisfy this requirement when taking backups in the + remote mode using SSH, you must set + option to postgres. + + + + + For PostgreSQL 9.5, functions + pg_create_restore_point(text) and + pg_switch_xlog() can be executed only if + the backup role is a superuser, so backup of a + cluster with low amount of WAL traffic by a non-superuser + role can take longer than the backup of the same cluster by + a superuser role. + + + + + The PostgreSQL server from which the backup was taken and + the restored server must be compatible by the + block_size + and + wal_block_size + parameters and have the same major release number. + Depending on cluster configuration, PostgreSQL itself may + apply additional restrictions, such as CPU architecture + or libc/libicu versions. + + + + + + + + + Installation and Setup + + Once you have pg_probackup installed, complete the following + setup: + + + + + Initialize the backup catalog. + + + + + Add a new backup instance to the backup catalog. + + + + + Configure the database cluster to enable pg_probackup backups. + + + + + Optionally, configure SSH for running pg_probackup operations + in the remote mode. + + + + + Initializing the Backup Catalog + + pg_probackup stores all WAL and backup files in the + corresponding subdirectories of the backup catalog. + + + To initialize the backup catalog, run the following command: + + +pg_probackup init -B backup_dir + + + where backup_dir is the path to the backup + catalog. If the backup_dir already exists, + it must be empty. Otherwise, pg_probackup returns an error. + + + The user launching pg_probackup must have full access to + the backup_dir directory. + + + pg_probackup creates the backup_dir backup + catalog, with the following subdirectories: + + + + + wal/ — directory for WAL files. + + + + + backups/ — directory for backup files. + + + + + Once the backup catalog is initialized, you can add a new backup + instance. + + + + Adding a New Backup Instance + + pg_probackup can store backups for multiple database clusters in + a single backup catalog. To set up the required subdirectories, + you must add a backup instance to the backup catalog for each + database cluster you are going to back up. + + + To add a new backup instance, run the following command: + + +pg_probackup add-instance -B backup_dir -D data_dir --instance instance_name [remote_options] + + + where: + + + + + data_dir is the data directory of the + cluster you are going to back up. To set up and use + pg_probackup, write access to this directory is required. + + + + + instance_name is the name of the + subdirectories that will store WAL and backup files for this + cluster. + + + + + remote_options + are optional parameters that need to be specified only if + data_dir is located + on a remote system. + + + + + pg_probackup creates the instance_name + subdirectories under the backups/ and wal/ directories of + the backup catalog. The + backups/instance_name directory contains + the pg_probackup.conf configuration file that controls + pg_probackup settings for this backup instance. If you run this + command with the + remote_options, the specified + parameters will be added to pg_probackup.conf. + + + For details on how to fine-tune pg_probackup configuration, see + . + + + The user launching pg_probackup must have full access to + backup_dir directory and at least read-only + access to data_dir directory. If you + specify the path to the backup catalog in the + BACKUP_PATH environment variable, you can + omit the corresponding option when running pg_probackup + commands. + + + + For PostgreSQL 11 or higher, it is recommended to use the + allow-group-access + feature, so that backup can be done by any OS user in the same + group as the cluster owner. In this case, the user should have + read permissions for the cluster directory. + + + + + Configuring the Database Cluster + + Although pg_probackup can be used by a superuser, it is + recommended to create a separate role with the minimum + permissions required for the chosen backup strategy. In these + configuration instructions, the backup role + is used as an example. + + + To perform a , the following + permissions for role backup are required + only in the database used for + connection to the PostgreSQL server: + + + For PostgreSQL 9.5: + + +BEGIN; +CREATE ROLE backup WITH LOGIN; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +COMMIT; + + + For PostgreSQL 9.6: + + +BEGIN; +CREATE ROLE backup WITH LOGIN; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_xlog_replay_location() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_checkpoint() TO backup; +COMMIT; + + + For PostgreSQL 10 or higher: + + +BEGIN; +CREATE ROLE backup WITH LOGIN; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_checkpoint() TO backup; +COMMIT; + + + In the + pg_hba.conf + file, allow connection to the database cluster on behalf of the + backup role. + + + Since pg_probackup needs to read cluster files directly, + pg_probackup must be started by (or connected to, + if used in the remote mode) the OS user that has read access to all files and + directories inside the data directory (PGDATA) you are going to + back up. + + + Depending on whether you plan to take + standalone or + archive backups, PostgreSQL + cluster configuration will differ, as specified in the sections + below. To back up the database cluster from a standby server, + run pg_probackup in the remote mode, or create PTRACK backups, + additional setup is required. + + + For details, see the sections + Setting up STREAM + Backups, + Setting up + continuous WAL archiving, + Setting up Backup + from Standby, + Configuring the + Remote Mode, + Setting up Partial + Restore, and + Setting up PTRACK + Backups. + + + + Setting up STREAM Backups + + To set up the cluster for + STREAM backups, complete the + following steps: + + + + + Grant the REPLICATION privilege to the backup role: + + +ALTER ROLE backup WITH REPLICATION; + + + + + In the + pg_hba.conf + file, allow replication on behalf of the + backup role. + + + + + Make sure the parameter + max_wal_senders + is set high enough to leave at least one session available + for the backup process. + + + + + Set the parameter + wal_level + to be higher than minimal. + + + + + If you are planning to take PAGE backups in the STREAM mode or + perform PITR with STREAM backups, you still have to configure + WAL archiving, as explained in the section + Setting up + continuous WAL archiving. + + + Once these steps are complete, you can start taking FULL, PAGE, + DELTA, and PTRACK backups in the + STREAM WAL mode. + + + + If you are planning to rely on + .pgpass + for authentication when running backup in STREAM mode, + then .pgpass must contain credentials for replication database, + used to establish connection via replication protocol. Example: + pghost:5432:replication:backup_user:my_strong_password + + + + + Setting up Continuous WAL Archiving + + Making backups in PAGE backup mode, performing + PITR, + making backups with + ARCHIVE WAL delivery mode and + running incremental backup after timeline switch + require + continuous + WAL archiving to be enabled. To set up continuous + archiving in the cluster, complete the following steps: + + + + + Make sure the + wal_level + parameter is higher than minimal. + + + + + If you are configuring archiving on master, + archive_mode + must be set to on or + always. To perform archiving on standby, + set this parameter to always. + + + + + Set the + archive_command + parameter, as follows: + + +archive_command = 'install_dir/pg_probackup archive-push -B backup_dir --instance instance_name --wal-file-name=%f [remote_options]' + + + + + where install_dir is the + installation directory of the pg_probackup + version you are going to use, backup_dir and + instance_name refer to the already + initialized backup catalog instance for this database cluster, + and remote_options + only need to be specified to archive WAL on a remote host. For details about all + possible archive-push parameters, see the + section . + + + Once these steps are complete, you can start making backups in the + ARCHIVE WAL mode, backups in + the PAGE backup mode, as well as perform + PITR. + + + You can view the current state of the WAL archive using the + command. For details, see + . + + + If you are planning to make PAGE backups and/or backups with + ARCHIVE WAL mode from a + standby server that generates a small amount of WAL traffic, + without long waiting for WAL segment to fill up, consider + setting the + archive_timeout + PostgreSQL parameter on + master. The value of this parameter should be slightly + lower than the setting (5 minutes by default), + so that there is enough time for the rotated + segment to be streamed to standby and sent to WAL archive before the + backup is aborted because of . + + + + Instead of using the + command provided by pg_probackup, you can use + any other tool to set up continuous archiving as long as it delivers WAL segments into + backup_dir/wal/instance_name + directory. If compression is used, it should be + gzip, and .gz suffix in filename is + mandatory. + + + + + Instead of configuring continuous archiving by setting the + archive_mode and archive_command + parameters, you can opt for using the + pg_receivewal + utility. In this case, pg_receivewal -D directory + option should point to + backup_dir/wal/instance_name + directory. pg_probackup supports WAL compression + that can be done by pg_receivewal. + Zero Data Loss archive strategy can be + achieved only by using pg_receivewal. + + + + + Setting up Backup from Standby + + For PostgreSQL 9.6 or higher, pg_probackup can take backups from + a standby server. This requires the following additional setup: + + + + + On the standby server, set the + hot_standby + parameter to on. + + + + + On the master server, set the + full_page_writes + parameter to on. + + + + + To perform standalone backups on standby, complete all steps + in section Setting + up STREAM Backups. + + + + + To perform archive backups on standby, complete all steps in + section + Setting + up continuous WAL archiving. + + + + + Once these steps are complete, you can start taking FULL, PAGE, + DELTA, or PTRACK backups with appropriate WAL delivery mode: + ARCHIVE or STREAM, from the standby server. + + + Backup from the standby server has the following limitations: + + + + + If the standby is promoted to the master during backup, the + backup fails. + + + + + All WAL records required for the backup must contain + sufficient full-page writes. This requires you to enable + full_page_writes on the master, and not + to use tools like pg_compresslog as + archive_command + to remove full-page writes from WAL files. + + + + + + Setting up Cluster Verification + + Logical verification of a database cluster requires the following + additional setup. Role backup is used as an + example: + + + + + Install the + amcheck + or + amcheck_next extension + in every database of the + cluster: + + +CREATE EXTENSION amcheck; + + + + + Grant the following permissions to the backup + role in every database of the cluster: + + + + +GRANT SELECT ON TABLE pg_catalog.pg_am TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_class TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_namespace TO backup; +GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; +GRANT EXECUTE ON FUNCTION bt_index_check(regclass) TO backup; +GRANT EXECUTE ON FUNCTION bt_index_check(regclass, bool) TO backup; + + + + Setting up Partial Restore + + If you are planning to use partial restore, complete the + following additional step: + + + + + Grant the read-only access to pg_catalog.pg_database to the + backup role only in the database + used for connection to + PostgreSQL server: + + +GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; + + + + + + Configuring the Remote Mode + + pg_probackup supports the remote mode that allows to perform + backup, restore and WAL archiving operations remotely. In this + mode, the backup catalog is stored on a local system, while + PostgreSQL instance to backup and/or to restore is located on a + remote system. Currently the only supported remote protocol is + SSH. + + + Set up SSH + + If you are going to use pg_probackup in remote mode via SSH, + complete the following steps: + + + + + Install pg_probackup on both systems: + backup_host and + db_host. + + + + + For communication between the hosts set up the passwordless + SSH connection between backup user on + backup_host and + postgres user on + db_host: + + +[backup@backup_host] ssh-copy-id postgres@db_host + + + + + If you are going to rely on + continuous + WAL archiving, set up passwordless SSH + connection between postgres user on + db_host and backup + user on backup_host: + + +[postgres@db_host] ssh-copy-id backup@backup_host + + + + + where: + + + + + backup_host is the system with + backup catalog. + + + + + db_host is the system with PostgreSQL + cluster. + + + + + backup is the OS user on + backup_host used to run pg_probackup. + + + + + postgres is the OS user on + db_host used to start the PostgreSQL + cluster. For PostgreSQL 11 or higher a + more secure approach can be used thanks to + allow-group-access + feature. + + + + + pg_probackup in the remote mode via SSH works + as follows: + + + + + Only the following commands can be launched in the remote + mode: , + , + , + , + . + + + + + Operating in remote mode requires pg_probackup + binary to be installed on both local and remote systems. + The versions of local and remote binary must be the same. + + + + + When started in the remote mode, the main pg_probackup process + on the local system connects to the remote system via SSH and + launches one or more agent processes on the remote system, which are called + remote agents. The number of remote agents + is equal to the / setting. + + + + + The main pg_probackup process uses remote agents to access + remote files and transfer data between local and remote + systems. + + + + + Remote agents try to minimize the network traffic and the number of + round-trips between hosts. + + + + + The main process is usually started on + backup_host and connects to + db_host, but in case of + archive-push and + archive-get commands the main process + is started on db_host and connects to + backup_host. + + + + + Once data transfer is complete, remote agents are + terminated and SSH connections are closed. + + + + + If an error condition is encountered by a remote agent, + then all agents are terminated and error details are + reported by the main pg_probackup process, which exits + with an error. + + + + + Compression is always done on + db_host, while decompression is always done on + backup_host. + + + + + + You can impose + additional + restrictions on SSH settings to protect the system + in the event of account compromise. + + + + + + Setting up PTRACK Backups + + The PTRACK backup mode can be used only for Postgres Pro Standard and + Postgres Pro Enterprise installations, or patched vanilla + PostgreSQL. Links to PTRACK patches can be found + here. + + + If you are going to use PTRACK backups, complete the following + additional steps. The role that will perform PTRACK backups + (the backup role in the examples below) must have + access to all the databases of the cluster. + + + For PostgreSQL 12 or higher: + + + + + Create PTRACK extension: + +CREATE EXTENSION ptrack; + + + + + + To enable tracking page updates, set ptrack.map_size + parameter to a positive integer and restart the server. + + + For optimal performance, it is recommended to set + ptrack.map_size to + N / 1024, where + N is the size of the + PostgreSQL cluster, in MB. If you set this + parameter to a lower value, PTRACK is more likely to map several blocks + together, which leads to false-positive results when tracking changed + blocks and increases the incremental backup size as unchanged blocks + can also be copied into the incremental backup. + Setting ptrack.map_size to a higher value + does not affect PTRACK operation. The maximum allowed value is 1024. + + + + + Grant the right to execute PTRACK + functions to the backup role + in the database used to connect to the cluster: + + +GRANT EXECUTE ON FUNCTION pg_ptrack_get_pagemapset(pg_lsn) TO backup; +GRANT EXECUTE ON FUNCTION pg_ptrack_control_lsn() TO backup; +GRANT EXECUTE ON FUNCTION pg_ptrack_get_block(oid, oid, oid, bigint) TO backup; + + + + + + + If you change the ptrack.map_size parameter value, + the previously created PTRACK map file is cleared, + and tracking newly changed blocks starts from scratch. Thus, you have + to retake a full backup before taking incremental PTRACK backups after + changing ptrack.map_size. + + + + + For older PostgreSQL versions, + PTRACK required taking backups in the exclusive mode + to provide exclusive access to bitmaps with changed blocks. + To set up PTRACK backups for PostgreSQL 11 + or lower, do the following: + + + + + Set the ptrack_enable parameter to + on. + + + + + Grant the right to execute PTRACK + functions to the backup role + in every database of the + cluster: + + +GRANT EXECUTE ON FUNCTION pg_catalog.pg_ptrack_clear() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_ptrack_get_and_clear(oid, oid) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; + + + + + + + + + Usage + + Creating a Backup + + To create a backup, run the following command: + + +pg_probackup backup -B backup_dir --instance instance_name -b backup_mode + + + Where backup_mode can take one of the + following values: + + + + + FULL — creates a full backup that contains all the data + files of the cluster to be restored. + + + + + DELTA — reads all data files in the data directory and + creates an incremental backup for pages that have changed + since the previous backup. + + + + + PAGE — creates an incremental backup based on the WAL + files that have been generated since the previous full or + incremental backup was taken. Only changed blocks are read + from data files. + + + + + PTRACK — creates an incremental backup tracking page + changes on the fly. + + + + + When restoring a cluster from an incremental backup, + pg_probackup relies on the parent full backup and all the + incremental backups between them, which is called + the backup chain. You must create at least + one full backup before taking incremental ones. + + + ARCHIVE Mode + + ARCHIVE is the default WAL delivery mode. + + + For example, to make a FULL backup in ARCHIVE mode, run: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL + + + ARCHIVE backups rely on + continuous + archiving to get WAL segments required to restore + the cluster to a consistent state at the time the backup was + taken. + + + When a backup is taken, pg_probackup + ensures that WAL files containing WAL records between Start + LSN and Stop LSN actually exist in + backup_dir/wal/instance_name + directory. pg_probackup also ensures that WAL records between + Start LSN and Stop LSN can be parsed. This precaution + eliminates the risk of silent WAL corruption. + + + + STREAM Mode + + STREAM is the optional WAL delivery mode. + + + For example, to make a FULL backup in the STREAM mode, add the + flag to the command from the + previous example: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL --stream --temp-slot + + + The optional flag ensures that + the required segments remain available if the WAL is rotated + before the backup is complete. + + + Unlike backups in ARCHIVE mode, STREAM backups include all the + WAL segments required to restore the cluster to a consistent + state at the time the backup was taken. + + + During pg_probackup + streams WAL files containing WAL records between Start LSN and + Stop LSN to + backup_dir/backups/instance_name/backup_id/database/pg_wal directory. To eliminate the risk + of silent WAL corruption, pg_probackup also + checks that WAL records between Start LSN and + Stop LSN can be parsed. + + + Even if you are using + continuous + archiving, STREAM backups can still be useful in the + following cases: + + + + + STREAM backups can be restored on the server that has no + file access to WAL archive. + + + + + STREAM backups enable you to restore the cluster state at + the point in time for which WAL files in archive are no + longer available. + + + + + Backup in STREAM mode can be taken from a standby of a + server that generates small amount of WAL traffic, + without long waiting for WAL segment to fill up. + + + + + + Page Validation + + If + data + checksums are enabled in the database cluster, + pg_probackup uses this information to check correctness of + data files during backup. While reading each page, + pg_probackup checks whether the calculated checksum coincides + with the checksum stored in the page header. This guarantees + that the PostgreSQL instance and the backup itself have no + corrupt pages. Note that pg_probackup reads database files + directly from the filesystem, so under heavy write load during + backup it can show false-positive checksum mismatches because of + partial writes. If a page checksum mismatch occurs, the page is + re-read and checksum comparison is repeated. + + + A page is considered corrupt if checksum comparison has failed + more than 100 times. In this case, the backup is aborted. + + + Even if data checksums are not enabled, pg_probackup + always performs sanity checks for page headers. + + + + External Directories + + To back up a directory located outside of the data directory, + use the optional parameter + that specifies the path to this directory. If you would like + to add more than one external directory, you can provide several paths + separated by colons on Linux systems or semicolons on Windows systems. + + + For example, to include /etc/dir1 and + /etc/dir2 directories into the full + backup of your instance_name instance + that will be stored under the backup_dir + directory on Linux, run: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL --external-dirs=/etc/dir1:/etc/dir2 + + + Similarly, to include C:\dir1 and + C:\dir2 directories into the full backup + on Windows, run: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL --external-dirs=C:\dir1;C:\dir2 + + + pg_probackup recursively copies the contents + of each external directory into a separate subdirectory in the backup + catalog. Since external directories included into different backups + do not have to be the same, when you are restoring the cluster from an + incremental backup, only those directories that belong to this + particular backup will be restored. Any external directories + stored in the previous backups will be ignored. + + + To include the same directories into each backup of your + instance, you can specify them in the pg_probackup.conf + configuration file using the + command with the + option. + + + + + Performing Cluster Verification + + To verify that PostgreSQL database cluster is + not corrupt, run the following command: + + +pg_probackup checkdb [-B backup_dir [--instance instance_name]] [-D data_dir] [connection_options] + + + + This command performs physical verification of all data files + located in the specified data directory by running page header + sanity checks, as well as block-level checksum verification if checksums are enabled. + If a corrupt page is detected, checkdb + continues cluster verification until all pages in the cluster + are validated. + + + + By default, similar page validation + is performed automatically while a backup is taken by + pg_probackup. The checkdb + command enables you to perform such page validation + on demand, without taking any backup copies, even if the cluster + is not backed up using pg_probackup at all. + + + + To perform cluster verification, pg_probackup + needs to connect to the cluster to be verified. In general, it is + enough to specify the backup instance of this cluster for + pg_probackup to determine the required + connection options. However, if -B and + --instance options are omitted, you have to provide + connection options and + data_dir via environment + variables or command-line options. + + + + Physical verification cannot detect logical inconsistencies, + missing or nullified blocks and entire files, or similar anomalies. Extensions + amcheck + and + amcheck_next + provide a partial solution to these problems. + + + If you would like, in addition to physical verification, to + verify all indexes in all databases using these extensions, you + can specify the flag when running + the command: + + +pg_probackup checkdb -D data_dir --amcheck [connection_options] + + + You can skip physical verification by specifying the + flag. In this case, + you can omit backup_dir and + data_dir options, only + connection options are + mandatory: + + +pg_probackup checkdb --amcheck --skip-block-validation [connection_options] + + + Logical verification can be done more thoroughly with the + flag by checking that all heap + tuples that should be indexed are actually indexed, but at the + higher cost of CPU, memory, and I/O consumption. + + + + Validating a Backup + + pg_probackup calculates checksums for each file in a backup + during the backup process. The process of checking checksums of + backup data files is called + the backup validation. By default, validation + is run immediately after the backup is taken and right before the + restore, to detect possible backup corruption. + + + If you would like to skip backup validation, you can specify the + flag when running + and + commands. + + + To ensure that all the required backup files are present and can + be used to restore the database cluster, you can run the + command with the exact + recovery target + options you are going to use for recovery. + + + For example, to check that you can restore the database cluster + from a backup copy up to transaction ID 4242, run + this command: + + +pg_probackup validate -B backup_dir --instance instance_name --recovery-target-xid=4242 + + + If validation completes successfully, pg_probackup displays the + corresponding message. If validation fails, you will receive an + error message with the exact time, transaction ID, and LSN up to + which the recovery is possible. + + + If you specify backup_id via + -i/--backup-id option, then only the backup copy + with specified backup ID will be validated. If + backup_id is specified with + recovery target + options, the validate command will check whether it is possible + to restore the specified backup to the specified + recovery target. + + + For example, to check that you can restore the database cluster + from a backup copy with the PT8XFX backup ID up to the + specified timestamp, run this command: + + +pg_probackup validate -B backup_dir --instance instance_name -i PT8XFX --recovery-target-time='2017-05-18 14:18:11+03' + + + If you specify the backup_id of an incremental backup, + all its parents starting from FULL backup will be + validated. + + + If you omit all the parameters, all backups are validated. + + + + Restoring a Cluster + + To restore the database cluster from a backup, run the + command with at least the following options: + + +pg_probackup restore -B backup_dir --instance instance_name -i backup_id + + + where: + + + + + backup_dir is the backup catalog that + stores all backup files and meta information. + + + + + instance_name is the backup instance + for the cluster to be restored. + + + + + backup_id specifies the backup to + restore the cluster from. If you omit this option, + pg_probackup uses the latest valid backup available for the + specified instance. If you specify an incremental backup to + restore, pg_probackup automatically restores the underlying + full backup and then sequentially applies all the necessary + increments. + + + + + + Once the restore command is complete, start + the database service. + + + + If you restore ARCHIVE backups, + perform PITR, + or specify the --restore-as-replica flag with the + restore command to set up a standby server, + pg_probackup creates a recovery configuration + file once all data files are copied into the target directory. This file + includes the minimal settings required for recovery, except for the password in the + primary_conninfo + parameter; you have to add the password manually or use + the --primary-conninfo option, if required. + For PostgreSQL 11 or lower, + recovery settings are written into the recovery.conf + file. Starting from PostgreSQL 12, + pg_probackup writes these settings into + the probackup_recovery.conf file and then includes + it into postgresql.auto.conf. + + + + If you are restoring a STREAM backup, the restore is complete + at once, with the cluster returned to a self-consistent state at + the point when the backup was taken. For ARCHIVE backups, + PostgreSQL replays all available archived WAL + segments, so the cluster is restored to the latest state possible + within the current timeline. You can change this behavior by using the + recovery target + options with the restore command, + as explained in . + + + + If the cluster to restore contains tablespaces, pg_probackup + restores them to their original location by default. To restore + tablespaces to a different location, use the + / option. Otherwise, + restoring the cluster on the same host will fail if tablespaces + are in use, because the backup would have to be written to the + same directories. + + + When using the / + option, you must provide absolute paths to the old and new + tablespace directories. If a path happens to contain an equals + sign (=), escape it with a backslash. This option can be + specified multiple times for multiple tablespaces. For example: + + +pg_probackup restore -B backup_dir --instance instance_name -D data_dir -j 4 -i backup_id -T tablespace1_dir=tablespace1_newdir -T tablespace2_dir=tablespace2_newdir + + + + To restore the cluster on a remote host, follow the instructions in + . + + + + By default, the + command validates the specified backup before restoring the + cluster. If you run regular backup validations and would like + to save time when restoring the cluster, you can specify the + flag to skip validation and + speed up the recovery. + + + + Incremental Restore + + The speed of restore from backup can be significantly improved + by replacing only invalid and changed pages in already + existing PostgreSQL data directory using + incremental + restore options with the + command. + + + To restore the database cluster from a backup in incremental mode, + run the command with the following options: + + +pg_probackup restore -B backup_dir --instance instance_name -D data_dir -I incremental_mode + + + Where incremental_mode can take one of the + following values: + + + + + CHECKSUM — read all data files in the data directory, validate + header and checksum in every page and replace only invalid + pages and those with checksum and LSN not matching with + corresponding page in backup. This is the simplest, + the most fool-proof incremental mode. Recommended to use by default. + + + + + LSN — read the pg_control in the + data directory to obtain redo LSN and redo TLI, which allows + to determine a point in history(shiftpoint), where data directory + state shifted from target backup chain history. If shiftpoint is not within + reach of backup chain history, then restore is aborted. + If shiftpoint is within reach of backup chain history, then read + all data files in the data directory, validate header and checksum in + every page and replace only invalid pages and those with LSN greater + than shiftpoint. + This mode offers a greater speed up compared to CHECKSUM, but rely + on two conditions to be met. First, + + data checksums parameter must be enabled in data directory (to avoid corruption + due to hint bits). This condition will be checked at the start of + incremental restore and the operation will be aborted if checksums are disabled. + Second, the pg_control file must be + synched with state of data directory. This condition cannot checked + at the start of restore, so it is a user responsibility to ensure + that pg_control contain valid information. + Therefore it is not recommended to use LSN mode in any situation, + where pg_control cannot be trusted or has been tampered with: + after pg_resetxlog execution, + after restore from backup without recovery been run, etc. + + + + + NONE — regular restore without any incremental optimizations. + + + + + + Regardless of chosen incremental mode, pg_probackup will check, that postmaster + in given destination directory is not running and system-identifier is + the same as in the backup. + + + + Suppose you want to return an old master as replica after switchover + using incremental restore in LSN mode: + + +============================================================================================================================================= + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +============================================================================================================================================= + node 12 QBRNBP 2020-06-11 17:40:58+03 DELTA ARCHIVE 16/15 40s 194MB 16MB 8.26 15/2C000028 15/2D000128 OK + node 12 QBRIDX 2020-06-11 15:51:42+03 PAGE ARCHIVE 15/15 11s 18MB 16MB 5.10 14/DC000028 14/DD0000B8 OK + node 12 QBRIAJ 2020-06-11 15:51:08+03 PAGE ARCHIVE 15/15 20s 141MB 96MB 6.22 14/D4BABFE0 14/DA9871D0 OK + node 12 QBRHT8 2020-06-11 15:45:56+03 FULL ARCHIVE 15/0 2m:11s 1371MB 416MB 10.93 14/9D000028 14/B782E9A0 OK + +pg_probackup restore -B /backup --instance node -R -I lsn +INFO: Running incremental restore into nonempty directory: "/var/lib/pgsql/12/data" +INFO: Destination directory redo point 15/2E000028 on tli 16 is within reach of backup QBRIDX with Stop LSN 14/DD0000B8 on tli 15 +INFO: shift LSN: 14/DD0000B8 +INFO: Restoring the database from backup at 2020-06-11 17:40:58+03 +INFO: Extracting the content of destination directory for incremental restore +INFO: Destination directory content extracted, time elapsed: 1s +INFO: Removing redundant files in destination directory +INFO: Redundant files are removed, time elapsed: 1s +INFO: Start restoring backup files. PGDATA size: 15GB +INFO: Backup files are restored. Transfered bytes: 1693MB, time elapsed: 43s +INFO: Restore incremental ratio (less is better): 11% (1693MB/15GB) +INFO: Restore of backup QBRNBP completed. + + + + Incremental restore is possible only for backups with + program_version equal or greater than 2.4.0. + + + + + Partial Restore + + If you have enabled + partial + restore before taking backups, you can restore + only some of the databases using + partial restore + options with the + commands. + + + To restore the specified databases only, run the command + with the following options: + + +pg_probackup restore -B backup_dir --instance instance_name --db-include=database_name + + + The option can be specified + multiple times. For example, to restore only databases + db1 and db2, run the + following command: + + +pg_probackup restore -B backup_dir --instance instance_name --db-include=db1 --db-include=db2 + + + To exclude one or more databases from restore, use + the option: + + +pg_probackup restore -B backup_dir --instance instance_name --db-exclude=database_name + + + The option can be specified + multiple times. For example, to exclude the databases + db1 and db2 from + restore, run the following command: + + +pg_probackup restore -B backup_dir --instance instance_name --db-exclude=db1 --db-exclude=db2 + + + Partial restore relies on lax behavior of PostgreSQL recovery + process toward truncated files. For recovery to work properly, files of excluded databases + are restored as files of zero size. After the PostgreSQL cluster is successfully started, + you must drop the excluded databases using + DROP DATABASE command. + + + + The template0 and + template1 databases are always restored. + + + + + + Performing Point-in-Time (PITR) Recovery + + If you have enabled + continuous + WAL archiving before taking backups, you can restore the + cluster to its state at an arbitrary point in time (recovery + target) using recovery + target options with the + command. + + + + You can use both STREAM and ARCHIVE backups for point in time + recovery as long as the WAL archive is available at least starting + from the time the backup was taken. + If / option is omitted, + pg_probackup automatically chooses the backup that is the + closest to the specified recovery target and starts the restore + process, otherwise pg_probackup will try to restore + the specified backup to the specified recovery target. + + + + + To restore the cluster state at the exact time, specify the + option, in the + timestamp format. For example: + + +pg_probackup restore -B backup_dir --instance instance_name --recovery-target-time='2017-05-18 14:18:11+03' + + + + + To restore the cluster state up to a specific transaction + ID, use the option: + + +pg_probackup restore -B backup_dir --instance instance_name --recovery-target-xid=687 + + + + + To restore the cluster state up to the specific LSN, use + option: + + +pg_probackup restore -B backup_dir --instance instance_name --recovery-target-lsn=16/B374D848 + + + + + To restore the cluster state up to the specific named restore + point, use option: + + +pg_probackup restore -B backup_dir --instance instance_name --recovery-target-name='before_app_upgrade' + + + + + To restore the backup to the latest state available in + the WAL archive, use option + with latest value: + + +pg_probackup restore -B backup_dir --instance instance_name --recovery-target='latest' + + + + + To restore the cluster to the earliest point of consistency, + use option with the + immediate value: + + +pg_probackup restore -B backup_dir --instance instance_name --recovery-target='immediate' + + + + + + Using <application>pg_probackup</application> in the Remote Mode + + pg_probackup supports the remote mode that allows to perform + backup and restore + operations remotely via SSH. In this mode, the backup catalog is + stored on a local system, while PostgreSQL instance to be backed + up is located on a remote system. You must have pg_probackup + installed on both systems. + + + + pg_probackup relies on passwordless SSH connection + for communication between the hosts. + + + + The typical workflow is as follows: + + + + + On your backup host, configure pg_probackup as explained in + the section + Installation and + Setup. For the + and + commands, make + sure to specify remote + options that point to the database host with the + PostgreSQL instance. + + + + + If you would like to take remote backups in + PAGE mode, or rely + on ARCHIVE WAL delivery + mode, or use + PITR, + configure continuous WAL archiving from the database host + to the backup host as explained in the section + Setting + up continuous WAL archiving. For the + and + commands, you + must specify the remote + options that point to the backup host with the backup + catalog. + + + + + Run or + commands with + remote options + on the backup host. + pg_probackup connects to the remote system via SSH and + creates a backup locally or restores the previously taken + backup on the remote system, respectively. + + + + + For example, to create an archive full backup of a + PostgreSQL cluster located on + a remote system with host address 192.168.0.2 + on behalf of the postgres user via SSH connection + through port 2302, run: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 + + + To restore the latest available backup on a remote system with host address + 192.168.0.2 on behalf of the postgres + user via SSH connection through port 2302, run: + + +pg_probackup restore -B backup_dir --instance instance_name --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 + + + Restoring an ARCHIVE backup or performing PITR in the remote mode + require additional information: destination address, port and + username for establishing an SSH connection + from the host with database + to the host with the backup + catalog. This information will be used by the + restore_command to copy WAL segments + from the archive to the PostgreSQL pg_wal directory. + + + To solve this problem, you can use + Remote WAL Archive + Options. + + + For example, to restore latest backup on remote system using + remote mode through SSH connection to user + postgres on host with address + 192.168.0.2 via port 2302 + and user backup on backup catalog host with + address 192.168.0.3 via port + 2303, run: + + +pg_probackup restore -B backup_dir --instance instance_name --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 --archive-host=192.168.0.3 --archive-port=2303 --archive-user=backup + + + Provided arguments will be used to construct the restore_command: + + +restore_command = 'install_dir/pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=%p --wal-file-name=%f --remote-host=192.168.0.3 --remote-port=2303 --remote-user=backup' + + + Alternatively, you can use the + option to provide the entire restore_command: + + +pg_probackup restore -B backup_dir --instance instance_name --remote-user=postgres --remote-host=192.168.0.2 --remote-port=2302 --restore-command='install_dir/pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=%p --wal-file-name=%f --remote-host=192.168.0.3 --remote-port=2303 --remote-user=backup' + + + + The remote mode is currently unavailable for + Windows systems. + + + + + Running <application>pg_probackup</application> on Parallel Threads + + , + , + , + , + and + processes can be + executed on several parallel threads. This can significantly + speed up pg_probackup operation given enough resources (CPU + cores, disk, and network bandwidth). + + + Parallel execution is controlled by the + -j/--threads command-line option. For + example, to create a backup using four parallel threads, run: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL -j 4 + + + + Parallel restore applies only to copying data from the + backup catalog to the data directory of the cluster. When + PostgreSQL server is started, WAL records need to be replayed, + and this cannot be done in parallel. + + + + + Configuring <application>pg_probackup</application> + + Once the backup catalog is initialized and a new backup instance + is added, you can use the pg_probackup.conf configuration file + located in the + backup_dir/backups/instance_name + directory to fine-tune pg_probackup configuration. + + + For example, and + commands use a regular + PostgreSQL connection. To avoid specifying + connection options + each time on the command line, you can set them in the + pg_probackup.conf configuration file using the + command. + + + + It is not recommended + to edit pg_probackup.conf manually. + + + + Initially, pg_probackup.conf contains the following settings: + + + + + PGDATA — the path to the data directory of the cluster to + back up. + + + + + system-identifier — the unique identifier of the PostgreSQL + instance. + + + + + Additionally, you can define + remote, + retention, + logging, and + compression settings + using the set-config command: + + +pg_probackup set-config -B backup_dir --instance instance_name +[--external-dirs=external_directory_path] [remote_options] [connection_options] [retention_options] [logging_options] + + + To view the current settings, run the following command: + + +pg_probackup show-config -B backup_dir --instance instance_name + + + You can override the settings defined in pg_probackup.conf when + running pg_probackup commands + via the corresponding environment variables and/or command line + options. + + + + Specifying Connection Settings + + If you define connection settings in the pg_probackup.conf + configuration file, you can omit connection options in all the + subsequent pg_probackup commands. However, if the corresponding + environment variables are set, they get higher priority. The + options provided on the command line overwrite both environment + variables and configuration file settings. + + + If nothing is given, the default values are taken. By default + pg_probackup tries to use local connection via Unix domain + socket (localhost on Windows) and tries to get the database name + and the user name from the PGUSER environment variable or the + current OS user name. + + + + Managing the Backup Catalog + + With pg_probackup, you can manage backups from the command line: + + + + + View backup + information + + + + + View WAL + Archive Information + + + + + Validate backups + + + + + Merge backups + + + + + Delete backups + + + + + Viewing Backup Information + + To view the list of existing backups for every instance, run + the command: + + +pg_probackup show -B backup_dir + + + pg_probackup displays the list of all the available backups. + For example: + + +BACKUP INSTANCE 'node' +====================================================================================================================================== + Instance Version ID Recovery time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +====================================================================================================================================== + node 10 PYSUE8 2019-10-03 15:51:48+03 FULL ARCHIVE 1/0 16s 9047kB 16MB 4.31 0/12000028 0/12000160 OK + node 10 P7XDQV 2018-04-29 05:32:59+03 DELTA STREAM 1/1 11s 19MB 16MB 1.00 0/15000060 0/15000198 OK + node 10 P7XDJA 2018-04-29 05:28:36+03 PTRACK STREAM 1/1 21s 32MB 32MB 1.00 0/13000028 0/13000198 OK + node 10 P7XDHU 2018-04-29 05:27:59+03 PAGE STREAM 1/1 15s 33MB 16MB 1.00 0/11000028 0/110001D0 OK + node 10 P7XDHB 2018-04-29 05:27:15+03 FULL STREAM 1/0 11s 39MB 16MB 1.00 0/F000028 0/F000198 OK + + + For each backup, the following information is provided: + + + + + Instance — the instance name. + + + + + VersionPostgreSQL major version. + + + + + ID — the backup identifier. + + + + + Recovery time — the earliest moment for which you can + restore the state of the database cluster. + + + + + Mode — the method used to take this backup. Possible + values: FULL, PAGE, DELTA, PTRACK. + + + + + WAL Mode — WAL delivery mode. Possible values: STREAM + and ARCHIVE. + + + + + TLI — timeline identifiers of the current backup and its + parent. + + + + + Time — the time it took to perform the backup. + + + + + Data — the size of the data files in this backup. This + value does not include the size of WAL files. For + STREAM backups, the total size of the backup can be calculated + as Data + WAL. + + + + + WAL — the uncompressed size of WAL files + that need to be applied during recovery for the backup to reach a consistent state. + + + + + Zratio — compression ratio calculated as + uncompressed-bytes / data-bytes. + + + + + Start LSN — WAL log sequence number corresponding to the + start of the backup process. REDO point for PostgreSQL + recovery process to start from. + + + + + Stop LSN — WAL log sequence number corresponding to the + end of the backup process. Consistency point for + PostgreSQL recovery process. + + + + + Status — backup status. Possible values: + + + + + OK — the backup is complete and valid. + + + + + DONE — the backup is complete, but was not validated. + + + + + RUNNING — the backup is in progress. + + + + + MERGING — the backup is being merged. + + + + + MERGED — the backup data files were + successfully merged, but its metadata is in the process + of being updated. Only full backups can have this status. + + + + + DELETING — the backup files are being deleted. + + + + + CORRUPT — some of the backup files are corrupt. + + + + + ERROR — the backup was aborted because of an + unexpected error. + + + + + ORPHAN — the backup is invalid because one of its + parent backups is corrupt or missing. + + + + + + + You can restore the cluster from the backup only if the backup + status is OK or DONE. + + + To get more detailed information about the backup, run the + show command with the backup ID: + + +pg_probackup show -B backup_dir --instance instance_name -i backup_id + + + The sample output is as follows: + + +#Configuration +backup-mode = FULL +stream = false +compress-alg = zlib +compress-level = 1 +from-replica = false + +#Compatibility +block-size = 8192 +wal-block-size = 8192 +checksum-version = 1 +program-version = 2.1.3 +server-version = 10 + +#Result backup info +timelineid = 1 +start-lsn = 0/04000028 +stop-lsn = 0/040000f8 +start-time = '2017-05-16 12:57:29' +end-time = '2017-05-16 12:57:31' +recovery-xid = 597 +recovery-time = '2017-05-16 12:57:31' +expire-time = '2020-05-16 12:57:31' +data-bytes = 22288792 +wal-bytes = 16777216 +uncompressed-bytes = 39961833 +pgdata-bytes = 39859393 +status = OK +parent-backup-id = 'PT8XFX' +primary_conninfo = 'user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any' + + + Detailed output has additional attributes: + + + + compress-alg — compression algorithm used during backup. Possible values: + zlib, pglz, none. + + + + + compress-level — compression level used during backup. + + + + + from-replica — was this backup taken on standby? Possible values: + 1, 0. + + + + + block-size — the block_size + setting of PostgreSQL cluster at the backup start. + + + + + checksum-version — are + data + block checksums enabled in the backed up PostgreSQL cluster? Possible values: 1, 0. + + + + + program-version — full version of pg_probackup binary used to create the backup. + + + + + start-time — the backup start time. + + + + + end-time — the backup end time. + + + + + expire-time — the point in time + when a pinned backup can be removed in accordance with retention + policy. This attribute is only available for pinned backups. + + + + + uncompressed-bytes — the size of data files before adding page headers and applying + compression. You can evaluate the effectiveness of compression + by comparing uncompressed-bytes to data-bytes if + compression if used. + + + + + pgdata-bytes — the size of PostgreSQL + cluster data files at the time of backup. You can evaluate the + effectiveness of an incremental backup by comparing + pgdata-bytes to uncompressed-bytes. + + + + + recovery-xid — transaction ID at the backup end time. + + + + + parent-backup-id — ID of the parent backup. Available only + for incremental backups. + + + + + primary_conninfolibpq connection parameters + used to connect to the PostgreSQL cluster to take this backup. The + password is not included. + + + + + note — text note attached to backup. + + + + + content-crc — CRC32 checksum of backup_content.control file. + It is used to detect corruption of backup metainformation. + + + + + + You can also get the detailed information about the backup + in the JSON format: + + +pg_probackup show -B backup_dir --instance instance_name --format=json -i backup_id + + + The sample output is as follows: + + +[ + { + "instance": "node", + "backups": [ + { + "id": "PT91HZ", + "parent-backup-id": "PT8XFX", + "backup-mode": "DELTA", + "wal": "ARCHIVE", + "compress-alg": "zlib", + "compress-level": 1, + "from-replica": false, + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.3", + "server-version": "10", + "current-tli": 16, + "parent-tli": 2, + "start-lsn": "0/8000028", + "stop-lsn": "0/8000160", + "start-time": "2019-06-17 18:25:11+03", + "end-time": "2019-06-17 18:25:16+03", + "recovery-xid": 0, + "recovery-time": "2019-06-17 18:25:15+03", + "data-bytes": 106733, + "wal-bytes": 16777216, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + } +] + + + + Viewing WAL Archive Information + + To view the information about WAL archive for every instance, + run the command: + + +pg_probackup show -B backup_dir [--instance instance_name] --archive + + + pg_probackup displays the list of all the available WAL files + grouped by timelines. For example: + + +ARCHIVE INSTANCE 'node' +=================================================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=================================================================================================================================== + 5 1 0/B000000 00000005000000000000000B 00000005000000000000000C 2 685kB 48.00 0 OK + 4 3 0/18000000 000000040000000000000018 00000004000000000000001A 3 648kB 77.00 0 OK + 3 2 0/15000000 000000030000000000000015 000000030000000000000017 3 648kB 77.00 0 OK + 2 1 0/B000108 00000002000000000000000B 000000020000000000000015 5 892kB 94.00 1 DEGRADED + 1 0 0/0 000000010000000000000001 00000001000000000000000A 10 8774kB 19.00 1 OK + + + For each timeline, the following information is provided: + + + + + TLI — timeline identifier. + + + + + Parent TLI — identifier of the timeline from which this timeline branched off. + + + + + Switchpoint — LSN of the moment when the timeline branched + off from its parent timeline. + + + + + Min Segno — the first WAL segment + belonging to the timeline. + + + + + Max Segno — the last WAL segment + belonging to the timeline. + + + + + N segments — number of WAL segments belonging to the + timeline. + + + + + Size — the size that files take on disk. + + + + + Zratio — compression ratio calculated as N segments * + wal_segment_size * wal_block_size / Size. + + + + + N backups — number of backups belonging to the timeline. + To get the details about backups, use the JSON format. + + + + + Status — status of the WAL archive for this timeline. Possible + values: + + + + + OK — all WAL segments between Min Segno + and Max Segno are present. + + + + + DEGRADED — some WAL segments between Min Segno + and Max Segno are missing. To find out which files are lost, + view this report in the JSON format. + + + + + + + To get more detailed information about the WAL archive in the JSON + format, run the command: + + +pg_probackup show -B backup_dir [--instance instance_name] --archive --format=json + + + The sample output is as follows: + + +[ + { + "instance": "replica", + "timelines": [ + { + "tli": 5, + "parent-tli": 1, + "switchpoint": "0/B000000", + "min-segno": "00000005000000000000000B", + "max-segno": "00000005000000000000000C", + "n-segments": 2, + "size": 685320, + "zratio": 48.00, + "closest-backup-id": "PXS92O", + "status": "OK", + "lost-segments": [], + "backups": [] + }, + { + "tli": 4, + "parent-tli": 3, + "switchpoint": "0/18000000", + "min-segno": "000000040000000000000018", + "max-segno": "00000004000000000000001A", + "n-segments": 3, + "size": 648625, + "zratio": 77.00, + "closest-backup-id": "PXS9CE", + "status": "OK", + "lost-segments": [], + "backups": [] + }, + { + "tli": 3, + "parent-tli": 2, + "switchpoint": "0/15000000", + "min-segno": "000000030000000000000015", + "max-segno": "000000030000000000000017", + "n-segments": 3, + "size": 648911, + "zratio": 77.00, + "closest-backup-id": "PXS9CE", + "status": "OK", + "lost-segments": [], + "backups": [] + }, + { + "tli": 2, + "parent-tli": 1, + "switchpoint": "0/B000108", + "min-segno": "00000002000000000000000B", + "max-segno": "000000020000000000000015", + "n-segments": 5, + "size": 892173, + "zratio": 94.00, + "closest-backup-id": "PXS92O", + "status": "DEGRADED", + "lost-segments": [ + { + "begin-segno": "00000002000000000000000D", + "end-segno": "00000002000000000000000E" + }, + { + "begin-segno": "000000020000000000000010", + "end-segno": "000000020000000000000012" + } + ], + "backups": [ + { + "id": "PXS9CE", + "backup-mode": "FULL", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "false", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 2, + "parent-tli": 0, + "start-lsn": "0/C000028", + "stop-lsn": "0/C000160", + "start-time": "2019-09-13 21:43:26+03", + "end-time": "2019-09-13 21:43:30+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:43:29+03", + "data-bytes": 104674852, + "wal-bytes": 16777216, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + }, + { + "tli": 1, + "parent-tli": 0, + "switchpoint": "0/0", + "min-segno": "000000010000000000000001", + "max-segno": "00000001000000000000000A", + "n-segments": 10, + "size": 8774805, + "zratio": 19.00, + "closest-backup-id": "", + "status": "OK", + "lost-segments": [], + "backups": [ + { + "id": "PXS92O", + "backup-mode": "FULL", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "true", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 1, + "parent-tli": 0, + "start-lsn": "0/4000028", + "stop-lsn": "0/6000028", + "start-time": "2019-09-13 21:37:36+03", + "end-time": "2019-09-13 21:38:45+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:37:30+03", + "data-bytes": 25987319, + "wal-bytes": 50331648, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + } + ] + }, + { + "instance": "master", + "timelines": [ + { + "tli": 1, + "parent-tli": 0, + "switchpoint": "0/0", + "min-segno": "000000010000000000000001", + "max-segno": "00000001000000000000000B", + "n-segments": 11, + "size": 8860892, + "zratio": 20.00, + "status": "OK", + "lost-segments": [], + "backups": [ + { + "id": "PXS92H", + "parent-backup-id": "PXS92C", + "backup-mode": "PAGE", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "false", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 1, + "parent-tli": 1, + "start-lsn": "0/4000028", + "stop-lsn": "0/50000B8", + "start-time": "2019-09-13 21:37:29+03", + "end-time": "2019-09-13 21:37:31+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:37:30+03", + "data-bytes": 1328461, + "wal-bytes": 33554432, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + }, + { + "id": "PXS92C", + "backup-mode": "FULL", + "wal": "ARCHIVE", + "compress-alg": "none", + "compress-level": 1, + "from-replica": "false", + "block-size": 8192, + "xlog-block-size": 8192, + "checksum-version": 1, + "program-version": "2.1.5", + "server-version": "10", + "current-tli": 1, + "parent-tli": 0, + "start-lsn": "0/2000028", + "stop-lsn": "0/2000160", + "start-time": "2019-09-13 21:37:24+03", + "end-time": "2019-09-13 21:37:29+03", + "recovery-xid": 0, + "recovery-time": "2019-09-13 21:37:28+03", + "data-bytes": 24871902, + "wal-bytes": 16777216, + "primary_conninfo": "user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmode=disable sslcompression=1 target_session_attrs=any", + "status": "OK" + } + ] + } + ] + } +] + + + Most fields are consistent with the plain format, with some + exceptions: + + + + + The size is in bytes. + + + + + The closest-backup-id attribute + contains the ID of the most recent valid backup that belongs to + one of the previous timelines. You can use this backup to perform + point-in-time recovery to this timeline. If + such a backup does not exist, this string is empty. + + + + + The lost-segments array provides with + information about intervals of missing segments in DEGRADED timelines. In OK + timelines, the lost-segments array is empty. + + + + + The backups array lists all backups + belonging to the timeline. If the timeline has no backups, this array is empty. + + + + + + + Configuring Retention Policy + + With pg_probackup, you can configure + retention policy to remove redundant backups, clean up unneeded + WAL files, as well as pin specific backups to ensure they are + kept for the specified time, as explained in the sections below. + All these actions can be combined together in any way. + + + + Removing Redundant Backups + + By default, all backup copies created with pg_probackup are + stored in the specified backup catalog. To save disk space, + you can configure retention policy to remove redundant backup copies. + + + To configure retention policy, set one or more of the + following variables in the pg_probackup.conf file via + : + + +--retention-redundancy=redundancy + + + Specifies the number of full backup + copies to keep in the backup catalog. + + +--retention-window=window + + + Defines the earliest point in time for which pg_probackup can + complete the recovery. This option is set in + the number of days from the + current moment. For example, if + retention-window=7, pg_probackup must + keep at least one backup copy that is older than seven days, with + all the corresponding WAL files, and all the backups that follow. + + + If both and + options are set, both these + conditions have to be taken into account when purging the backup + catalog. For example, if you set --retention-redundancy=2 + and --retention-window=7, + pg_probackup has to keep two full backup + copies, as well as all the backups required to ensure recoverability + for the last seven days: + + +pg_probackup set-config -B backup_dir --instance instance_name --retention-redundancy=2 --retention-window=7 + + + + To clean up the backup catalog in accordance with retention policy, + you have to run the command with + retention flags, as shown + below, or use the command with + these flags to process the outdated backup copies right when the new + backup is created. + + + + For example, to remove all backup copies that no longer satisfy the + defined retention policy, run the following command with the + --delete-expired flag: + + +pg_probackup delete -B backup_dir --instance instance_name --delete-expired + + + If you would like to also remove the WAL files that are no + longer required for any of the backups, you should also specify the + flag: + + +pg_probackup delete -B backup_dir --instance instance_name --delete-expired --delete-wal + + + + You can also set or override the current retention policy by + specifying and + options directly when + running delete or backup + commands: + + +pg_probackup delete -B backup_dir --instance instance_name --delete-expired --retention-window=7 --retention-redundancy=2 + + + Since incremental backups require that their parent full + backup and all the preceding incremental backups are + available, if any of such backups expire, they still cannot be + removed while at least one incremental backup in this chain + satisfies the retention policy. To avoid keeping expired + backups that are still required to restore an active + incremental one, you can merge them with this backup using the + flag when running + or + commands. + + + + Suppose you have backed up the node + instance in the backup_dir directory, + with the option set + to 7, and you have the following backups + available on April 10, 2019: + + +BACKUP INSTANCE 'node' +=================================================================================================================================== + Instance Version ID Recovery time Mode WAL TLI Time Data WAL Zratio Start LSN Stop LSN Status +=================================================================================================================================== + node 10 P7XDHR 2019-04-10 05:27:15+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/18000059 0/18000197 OK + node 10 P7XDQV 2019-04-08 05:32:59+03 PAGE STREAM 1/0 11s 19MB 16MB 1.0 0/15000060 0/15000198 OK + node 10 P7XDJA 2019-04-03 05:28:36+03 DELTA STREAM 1/0 21s 32MB 16MB 1.0 0/13000028 0/13000198 OK + -------------------------------------------------------retention window-------------------------------------------------------- + node 10 P7XDHU 2019-04-02 05:27:59+03 PAGE STREAM 1/0 31s 33MB 16MB 1.0 0/11000028 0/110001D0 OK + node 10 P7XDHB 2019-04-01 05:27:15+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/F000028 0/F000198 OK + node 10 P7XDFT 2019-03-29 05:26:25+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/D000028 0/D000198 OK + + + Even though P7XDHB and P7XDHU backups are outside the + retention window, they cannot be removed as it invalidates the + succeeding incremental backups P7XDJA and P7XDQV that are + still required, so, if you run the + command with the + flag, only the P7XDFT full + backup will be removed. + + + With the option, the P7XDJA + backup is merged with the underlying P7XDHU and P7XDHB backups + and becomes a full one, so there is no need to keep these + expired backups anymore: + + +pg_probackup delete -B backup_dir --instance node --delete-expired --merge-expired +pg_probackup show -B backup_dir + + +BACKUP INSTANCE 'node' +================================================================================================================================== + Instance Version ID Recovery time Mode WAL TLI Time Data WAL Zratio Start LSN Stop LSN Status +================================================================================================================================== + node 10 P7XDHR 2019-04-10 05:27:15+03 FULL STREAM 1/0 11s 200MB 16MB 1.0 0/18000059 0/18000197 OK + node 10 P7XDQV 2019-04-08 05:32:59+03 PAGE STREAM 1/0 11s 19MB 16MB 1.0 0/15000060 0/15000198 OK + node 10 P7XDJA 2019-04-03 05:28:36+03 FULL STREAM 1/0 21s 32MB 16MB 1.0 0/13000028 0/13000198 OK + + + The Time field for the merged backup displays the time + required for the merge. + + + + + Pinning Backups + + If you need to keep certain backups longer than the + established retention policy allows, you can pin them + for arbitrary time. For example: + + +pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id --ttl=30d + + + This command sets the expiration time of the + specified backup to 30 days starting from the time + indicated in its recovery-time attribute. + + + You can also explicitly set the expiration time for a backup + using the option. For example: + + +pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id --expire-time='2020-01-01 00:00:00+03' + + + Alternatively, you can use the and + options with the + command to pin the newly + created backup: + + +pg_probackup backup -B backup_dir --instance instance_name -b FULL --ttl=30d +pg_probackup backup -B backup_dir --instance instance_name -b FULL --expire-time='2020-01-01 00:00:00+03' + + + To check if the backup is pinned, + run the command: + +pg_probackup show -B backup_dir --instance instance_name -i backup_id + + + + If the backup is pinned, it has the expire-time + attribute that displays its expiration time: + +... +recovery-time = '2017-05-16 12:57:31' +expire-time = '2020-01-01 00:00:00+03' +data-bytes = 22288792 +... + + + + You can unpin the backup by setting the option to zero: + + +pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id --ttl=0 + + + + + A pinned incremental backup implicitly pins all + its parent backups. If you unpin such a backup later, + its implicitly pinned parents will also be automatically unpinned. + + + + + + Configuring WAL Archive Retention Policy + + When continuous + WAL archiving is enabled, archived WAL segments can take a lot + of disk space. Even if you delete old backup copies from time to time, + the --delete-wal flag can + purge only those WAL segments that do not apply to any of the + remaining backups in the backup catalog. However, if point-in-time + recovery is critical only for the most recent backups, you can + configure WAL archive retention policy to keep WAL archive of + limited depth and win back some more disk space. + + + + To configure WAL archive retention policy, you have to run the + command with the + --wal-depth option that specifies the number + of backups that can be used for PITR. + This setting applies to all the timelines, so you should be able to perform + PITR for the same number of backups on each timeline, if available. + Pinned backups are + not included into this count: if one of the latest backups + is pinned, pg_probackup ensures that + PITR is possible for one extra backup. + + + + To remove WAL segments that do not satisfy the defined WAL archive + retention policy, you simply have to run the + or command with the --delete-wal + flag. For archive backups, WAL segments between Start LSN + and Stop LSN are always kept intact, so such backups + remain valid regardless of the --wal-depth setting + and can still be restored, if required. + + + + You can also use the option + with the and + commands to override the previously defined WAL archive retention + policy and purge old WAL segments on the fly. + + + + Suppose you have backed up the node + instance in the backup_dir directory and + configured + continuous WAL + archiving: + + +pg_probackup show -B backup_dir --instance node + + +BACKUP INSTANCE 'node' +==================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +==================================================================================================================================== + node 11 PZ9442 2019-10-12 10:43:21+03 DELTA STREAM 1/0 10s 121kB 16MB 1.00 0/46000028 0/46000160 OK + node 11 PZ943L 2019-10-12 10:43:04+03 FULL STREAM 1/0 10s 180MB 32MB 1.00 0/44000028 0/44000160 OK + node 11 PZ7YR5 2019-10-11 19:49:56+03 DELTA STREAM 1/1 10s 112kB 32MB 1.00 0/41000028 0/41000160 OK + node 11 PZ7YMP 2019-10-11 19:47:16+03 DELTA STREAM 1/1 10s 376kB 32MB 1.00 0/3E000028 0/3F0000B8 OK + node 11 PZ7YK2 2019-10-11 19:45:45+03 FULL STREAM 1/0 11s 180MB 16MB 1.00 0/3C000028 0/3C000198 OK + node 11 PZ7YFO 2019-10-11 19:43:04+03 FULL STREAM 1/0 10s 30MB 16MB 1.00 0/2000028 0/200ADD8 OK + + + You can check the state of the WAL archive by running the + command with the + flag: + + +pg_probackup show -B backup_dir --instance node --archive + + +ARCHIVE INSTANCE 'node' +=============================================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=============================================================================================================================== + 1 0 0/0 000000010000000000000001 000000010000000000000047 71 36MB 31.00 6 OK + + + WAL purge without cannot + achieve much, only one segment is removed: + + +pg_probackup delete -B backup_dir --instance node --delete-wal + + +ARCHIVE INSTANCE 'node' +=============================================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=============================================================================================================================== + 1 0 0/0 000000010000000000000002 000000010000000000000047 70 34MB 32.00 6 OK + + + If you would like, for example, to keep only those WAL + segments that can be applied to the latest valid backup, set the + option to 1: + + +pg_probackup delete -B backup_dir --instance node --delete-wal --wal-depth=1 + + +ARCHIVE INSTANCE 'node' +================================================================================================================================ + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +================================================================================================================================ + 1 0 0/0 000000010000000000000046 000000010000000000000047 2 143kB 228.00 6 OK + + + Alternatively, you can use the + option with the command: + + +pg_probackup backup -B backup_dir --instance node -b DELTA --wal-depth=1 --delete-wal + + +ARCHIVE INSTANCE 'node' +=============================================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status +=============================================================================================================================== + 1 0 0/0 000000010000000000000048 000000010000000000000049 1 72kB 228.00 7 OK + + + + + Merging Backups + + As you take more and more incremental backups, the total size of + the backup catalog can substantially grow. To save disk space, + you can merge incremental backups to their parent full backup by + running the merge command, specifying the backup ID of the most + recent incremental backup you would like to merge: + + +pg_probackup merge -B backup_dir --instance instance_name -i backup_id + + + This command merges backups that belong to a common incremental backup + chain. If you specify a full backup, it will be merged with its first + incremental backup. If you specify an incremental backup, it will be + merged to its parent full backup, together with all incremental backups + between them. Once the merge is complete, the full backup takes in all + the merged data, and the incremental backups are removed as redundant. + Thus, the merge operation is virtually equivalent to retaking a full + backup and removing all the outdated backups, but it allows to save much + time, especially for large data volumes, as well as I/O and network + traffic if you are using pg_probackup in the + remote mode. + + + Before the merge, pg_probackup validates all the affected + backups to ensure that they are valid. You can check the current + backup status by running the + command with the backup ID: + + +pg_probackup show -B backup_dir --instance instance_name -i backup_id + + + If the merge is still in progress, the backup status is + displayed as MERGING. For full backups, + it can also be shown as MERGED while the + metadata is being updated at the final stage of the merge. + The merge is idempotent, so you can + restart the merge if it was interrupted. + + + + Deleting Backups + + To delete a backup that is no longer required, run the following + command: + + +pg_probackup delete -B backup_dir --instance instance_name -i backup_id + + + This command will delete the backup with the specified + backup_id, together with all the + incremental backups that descend from + backup_id, if any. This way you can delete + some recent incremental backups, retaining the underlying full + backup and some of the incremental backups that follow it. + + + To delete obsolete WAL files that are not necessary to restore + any of the remaining backups, use the + flag: + + +pg_probackup delete -B backup_dir --instance instance_name --delete-wal + + + To delete backups that are expired according to the current + retention policy, use the + flag: + + +pg_probackup delete -B backup_dir --instance instance_name --delete-expired + + + Expired backups cannot be removed while at least one + incremental backup that satisfies the retention policy is based + on them. If you would like to minimize the number of backups + still required to keep incremental backups valid, specify the + flag when running this + command: + + +pg_probackup delete -B backup_dir --instance instance_name --delete-expired --merge-expired + + + In this case, pg_probackup searches for the oldest incremental + backup that satisfies the retention policy and merges this + backup with the underlying full and incremental backups that + have already expired, thus making it a full backup. Once the + merge is complete, the remaining expired backups are deleted. + + + Before merging or deleting backups, you can run the + delete command with the + flag, which displays the status of + all the available backups according to the current retention + policy, without performing any irreversible actions. + + + To delete all backups with specific status, use the : + + +pg_probackup delete -B backup_dir --instance instance_name --status=ERROR + + + + Deleting backups by status ignores established retention policies. + + + + + + +Command-Line Reference + + Commands + + This section describes pg_probackup commands. + Optional parameters are enclosed in square brackets. For detailed + parameter descriptions, see the section Options. + + + version + +pg_probackup version + + + Prints pg_probackup version. + + + + help + +pg_probackup help [command] + + + Displays the synopsis of pg_probackup commands. If one of the + pg_probackup commands is specified, shows detailed information + about the options that can be used with this command. + + + + init + +pg_probackup init -B backup_dir [--help] + + + Initializes the backup catalog in + backup_dir that will store backup copies, + WAL archive, and meta information for the backed up database + clusters. If the specified backup_dir + already exists, it must be empty. Otherwise, pg_probackup + displays a corresponding error message. + + + For details, see the section + Initializing + the Backup Catalog. + + + + add-instance + +pg_probackup add-instance -B backup_dir -D data_dir --instance instance_name [--help] + + + Initializes a new backup instance inside the backup catalog + backup_dir and generates the + pg_probackup.conf configuration file that controls + pg_probackup settings for the cluster with the specified + data_dir data directory. + + + For details, see the section + Adding a New + Backup Instance. + + + + del-instance + +pg_probackup del-instance -B backup_dir --instance instance_name [--help] + + + Deletes all backups and WAL files associated with the + specified instance. + + + + set-config + +pg_probackup set-config -B backup_dir --instance instance_name +[--help] [--pgdata=pgdata-path] +[--retention-redundancy=redundancy][--retention-window=window][--wal-depth=wal_depth] +[--compress-algorithm=compression_algorithm] [--compress-level=compression_level] +[-d dbname] [-h host] [-p port] [-U username] +[--archive-timeout=timeout] [--external-dirs=external_directory_path] +[--restore-command=cmdline] +[remote_options] [remote_wal_archive_options] [logging_options] + + + Adds the specified connection, compression, retention, logging, + and external directory settings into the pg_probackup.conf + configuration file, or modifies the previously defined values. + + + For all available settings, see the + Options section. + + + It is not recommended to + edit pg_probackup.conf manually. + + + + set-backup + +pg_probackup set-backup -B backup_dir --instance instance_name -i backup_id +{--ttl=ttl | --expire-time=time} +[--note=backup_note] [--help] + + + Sets the provided backup-specific settings into the + backup.control configuration file, or modifies the previously + defined values. + + + + + + + Sets the text note for backup copy. + If backup_note contain newline characters, + then only substring before first newline character will be saved. + Max size of text note is 1 KB. + The 'none' value removes current note. + + + + + + For all available pinning settings, see the section + Pinning Options. + + + + show-config + +pg_probackup show-config -B backup_dir --instance instance_name [--format=plain|json] + + + Displays the contents of the pg_probackup.conf configuration + file located in the + backup_dir/backups/instance_name + directory. You can specify the + --format=json option to get the result + in the JSON format. By default, configuration settings are + shown as plain text. + + + To edit pg_probackup.conf, use the + command. + + + + show + +pg_probackup show -B backup_dir +[--help] [--instance instance_name [-i backup_id | --archive]] [--format=plain|json] + + + Shows the contents of the backup catalog. If + instance_name and + backup_id are specified, shows detailed + information about this backup. If the option is + specified, shows the contents of WAL archive of the backup + catalog. + + + By default, the contents of the backup catalog is shown as + plain text. You can specify the + --format=json option to get the result + in the JSON format. + + + For details on usage, see the sections + Managing the + Backup Catalog and + Viewing WAL + Archive Information. + + + + backup + +pg_probackup backup -B backup_dir -b backup_mode --instance instance_name +[--help] [-j num_threads] [--progress] +[-C] [--stream [-S slot_name] [--temp-slot]] [--backup-pg-log] +[--no-validate] [--skip-block-validation] +[-w --no-password] [-W --password] +[--archive-timeout=timeout] [--external-dirs=external_directory_path] +[--no-sync] [--note=backup_note] +[connection_options] [compression_options] [remote_options] +[retention_options] [pinning_options] [logging_options] + + + Creates a backup copy of the PostgreSQL instance. + + + + + + + + Specifies the backup mode to use. Possible values are: + + + + + FULL — creates a full backup that contains all the data + files of the cluster to be restored. + + + + + DELTA — reads all data files in the data directory and + creates an incremental backup for pages that have changed + since the previous backup. + + + + + PAGE — creates an incremental PAGE backup based on the WAL + files that have changed since the previous full or + incremental backup was taken. + + + + + PTRACK — creates an incremental PTRACK backup tracking + page changes on the fly. + + + + + + + + + + + + + Spreads out the checkpoint over a period of time. By default, + pg_probackup tries to complete the checkpoint as soon as + possible. + + + + + + + + + Makes a STREAM backup, which + includes all the necessary WAL files by streaming them from + the database server via replication protocol. + + + + + + + + + Creates a temporary physical replication slot for streaming + WAL from the backed up PostgreSQL instance. It ensures that + all the required WAL segments remain available if WAL is + rotated while the backup is in progress. This flag can only be + used together with the flag. + The default slot name is pg_probackup_slot, + which can be changed using the / option. + + + + + + + + + + Specifies the replication slot for WAL streaming. This option + can only be used together with the + flag. + + + + + + + + + Includes the log directory into the backup. This directory + usually contains log messages. By default, log directory is + excluded. + + + + + + + + + + Includes the specified directory into the backup by recursively + copying its contents into a separate subdirectory in the backup catalog. This option + is useful to back up scripts, SQL dump files, and configuration + files located outside of the data directory. If you would like + to back up several external directories, separate their paths + by a colon on Unix and a semicolon on Windows. + + + + + + + + + Sets the timeout for WAL segment archiving and + streaming, in seconds. By default, pg_probackup waits 300 seconds. + + + + + + + + + Disables block-level checksum verification to speed up + the backup process. + + + + + + + + + Skips automatic validation after the backup is taken. You can + use this flag if you validate backups regularly and would like + to save time when running backup operations. + + + + + + + + + Do not sync backed up files to disk. You can use this flag to speed + up the backup process. Using this flag can result in data + corruption in case of operating system or hardware crash. + If you use this option, it is recommended to run the + command once the backup is complete + to detect possible issues. + + + + + + + + Sets the text note for backup copy. + If backup_note contain newline characters, + then only substring before first newline character will be saved. + Max size of text note is 1 KB. + The 'none' value removes current note. + + + + + + + + + Additionally, connection + options, retention + options, pinning + options, remote + mode options, + compression + options, logging + options, and common + options can be used. + + + For details on usage, see the section + Creating a Backup. + + + + restore + +pg_probackup restore -B backup_dir --instance instance_name +[--help] [-D data_dir] [-i backup_id] +[-j num_threads] [--progress] +[-T OLDDIR=NEWDIR] [--external-mapping=OLDDIR=NEWDIR] [--skip-external-dirs] +[-R | --restore-as-replica] [--no-validate] [--skip-block-validation] +[--force] [--no-sync] +[--restore-command=cmdline] +[--primary-conninfo=primary_conninfo] +[-S | --primary-slot-name=slot_name] +[recovery_target_options] [logging_options] [remote_options] +[partial_restore_options] [remote_wal_archive_options] + + + Restores the PostgreSQL instance from a backup copy located in + the backup_dir backup catalog. If you + specify a recovery + target option, pg_probackup finds the closest + backup and restores it to the specified recovery target. + If neither the backup ID nor recovery target options are provided, + pg_probackup uses the most recent backup + to perform the recovery. + + + + + + + + + Creates a minimal recovery configuration file to facilitate setting up a + standby server. If the replication connection requires a password, + you must specify the password manually in the + primary_conninfo + parameter as it is not included. + For PostgreSQL 11 or lower, + recovery settings are written into the recovery.conf + file. Starting from PostgreSQL 12, + pg_probackup writes these settings into + the probackup_recovery.conf file in the data + directory, and then includes them into the + postgresql.auto.conf when the cluster is + is started. + + + + + + + + + Sets the + primary_conninfo + parameter to the specified value. + This option will be ignored unless the flag is specified. + + + Example: --primary-conninfo='host=192.168.1.50 port=5432 user=foo password=foopass' + + + + + + + + + + Sets the + primary_slot_name + parameter to the specified value. + This option will be ignored unless the flag is specified. + + + + + + + + + + Relocates the tablespace from the OLDDIR to the NEWDIR + directory at the time of recovery. Both OLDDIR and NEWDIR must + be absolute paths. If the path contains the equals sign (=), + escape it with a backslash. This option can be specified + multiple times for multiple tablespaces. + + + + + + + + + Relocates an external directory included into the backup from + the OLDDIR to the NEWDIR directory at the time of recovery. + Both OLDDIR and NEWDIR must be absolute paths. If the path + contains the equals sign (=), escape it with a backslash. This + option can be specified multiple times for multiple + directories. + + + + + + + + + Skip external directories included into the backup with the + option. The contents of + these directories will not be restored. + + + + + + + + + Disables block-level checksum verification to speed up + validation. During automatic validation before the restore only + file-level checksums will be verified. + + + + + + + + + Skips backup validation. You can use this flag if you validate + backups regularly and would like to save time when running + restore operations. + + + + + + + + + Sets the + restore_command + parameter to the specified command. For example: + --restore-command='cp /mnt/server/archivedir/%f "%p"' + + + + + + + + + Allows to ignore an invalid status of the backup. You can use + this flag if you need to restore the + PostgreSQL cluster from a corrupt or an invalid backup. + Use with caution. + + + + + + + + + Do not sync restored files to disk. You can use this flag to speed + up restore process. Using this flag can result in data + corruption in case of operating system or hardware crash. + If it happens, you have to run the + command again. + + + + + + + Additionally, recovery + target options, + remote mode + options, + remote WAL archive + options, logging + options, partial + restore options, and common + options can be used. + + + For details on usage, see the section + Restoring a + Cluster. + + + + checkdb + +pg_probackup checkdb +[-B backup_dir] [--instance instance_name] [-D data_dir] +[--help] [-j num_threads] [--progress] +[--skip-block-validation] [--amcheck] [--heapallindexed] +[connection_options] [logging_options] + + + Verifies the PostgreSQL database cluster correctness by + detecting physical and logical corruption. + + + + + + + + Performs logical verification of indexes for the specified + PostgreSQL instance if no corruption was found while checking + data files. You must have the amcheck + extension or the amcheck_next extension + installed in the database to check its indexes. For databases + without amcheck, index verification will be skipped. + + + + + + + + + Skip validation of data files. You can use this flag only + together with the flag, so that only logical + verification of indexes is performed. + + + + + + + + + Checks that all heap tuples that should be indexed are + actually indexed. You can use this flag only together with the + flag. + + + This check is only possible if you are using the + amcheck extension of version 2.0 or higher, or + the amcheck_next extension of any version. + + + + + + + Additionally, connection + options and logging + options can be used. + + + For details on usage, see the section + Verifying a + Cluster. + + + + validate + +pg_probackup validate -B backup_dir +[--help] [--instance instance_name] [-i backup_id] +[-j num_threads] [--progress] +[--skip-block-validation] +[recovery_target_options] [logging_options] + + + Verifies that all the files required to restore the cluster + are present and are not corrupt. If + instance_name is not specified, + pg_probackup validates all backups available in the backup + catalog. If you specify the instance_name + without any additional options, pg_probackup validates all the + backups available for this backup instance. If you specify the + instance_name with a + recovery target + option and/or a backup_id, + pg_probackup checks whether it is possible to restore the + cluster using these options. + + + For details, see the section + Validating a + Backup. + + + + merge + +pg_probackup merge -B backup_dir --instance instance_name -i backup_id +[--help] [-j num_threads] [--progress] +[logging_options] + + + Merges backups that belong to a common incremental backup + chain. If you specify a full backup, it will be merged with its first + incremental backup. If you specify an incremental backup, it will be + merged to its parent full backup, together with all incremental backups + between them. Once the merge is complete, the full backup takes in all + the merged data, and the incremental backups are removed as redundant. + + + For details, see the section + Merging Backups. + + + + delete + +pg_probackup delete -B backup_dir --instance instance_name +[--help] [-j num_threads] [--progress] +[--retention-redundancy=redundancy][--retention-window=window][--wal-depth=wal_depth] [--delete-wal] +{-i backup_id | --delete-expired [--merge-expired] | --merge-expired | --status=backup_status} +[--dry-run] [logging_options] + + + Deletes backup with specified backup_id + or launches the retention purge of backups and archived WAL + that do not satisfy the current retention policies. + + + For details, see the sections + Deleting Backups, + Retention Options and + Configuring + Retention Policy. + + + + archive-push + +pg_probackup archive-push -B backup_dir --instance instance_name +--wal-file-name=wal_file_name [--wal-file-path=wal_file_path] +[--help] [--no-sync] [--compress] [--no-ready-rename] [--overwrite] +[-j num_threads] [--batch-size=batch_size] +[--archive-timeout=timeout] +[--compress-algorithm=compression_algorithm] +[--compress-level=compression_level] +[remote_options] [logging_options] + + + Copies WAL files into the corresponding subdirectory of the + backup catalog and validates the backup instance by + instance_name and + system-identifier. If parameters of the + backup instance and the cluster do not match, this command + fails with the following error message: Refuse to push WAL + segment segment_name into archive. Instance parameters + mismatch. + + + If the files to be copied already exists in the backup catalog, + pg_probackup computes and compares their checksums. If the + checksums match, archive-push skips the corresponding file and + returns a successful execution code. Otherwise, archive-push + fails with an error. If you would like to replace WAL files in + the case of checksum mismatch, run the archive-push command + with the flag. + + + Each file is copied to a temporary file with the + .part suffix. If the temporary file already + exists, pg_probackup will wait + seconds before discarding it. + After the copy is done, atomic rename is performed. + This algorithm ensures that a failed archive-push + will not stall continuous archiving and that concurrent archiving from + multiple sources into a single WAL archive has no risk of archive + corruption. + + + To speed up archiving, you can specify the option + to copy WAL segments in batches of the specified size. + If option is used, then you can also specify + the option to copy the batch of WAL segments on multiple threads. + + + WAL segments copied to the archive are synced to disk unless + the flag is used. + + + You can use archive-push in the + archive_command + PostgreSQL parameter to set up + continuous + WAL archiving. + + + For details, see sections + Archiving Options and + Compression + Options. + + + + archive-get + +pg_probackup archive-get -B backup_dir --instance instance_name --wal-file-path=wal_file_path --wal-file-name=wal_file_name +[-j num_threads] [--batch-size=batch_size] +[--prefetch-dir=prefetch_dir_path] [--no-validate-wal] +[--help] [remote_options] [logging_options] + + + Copies WAL files from the corresponding subdirectory of the + backup catalog to the cluster's write-ahead log location. This + command is automatically set by pg_probackup as part of the + restore_command when + restoring backups using a WAL archive. You do not need to set + it manually. + + + + To speed up recovery, you can specify the option + to copy WAL segments in batches of the specified size. + If option is used, then you can also specify + the option to copy the batch of WAL segments on multiple threads. + + + + For details, see section Archiving Options. + + + + + Options + + This section describes command-line options for pg_probackup + commands. If the option value can be derived from an environment + variable, this variable is specified below the command-line + option, in the uppercase. Some values can be taken from the + pg_probackup.conf configuration file located in the backup + catalog. + + + For details, see . + + + If an option is specified using more than one method, + command-line input has the highest priority, while the + pg_probackup.conf settings have the lowest priority. + + + Common Options + + The list of general options. + + + + + + +BACKUP_PATH + + + Specifies the absolute path to the backup catalog. Backup + catalog is a directory where all backup files and meta + information are stored. Since this option is required for most + of the pg_probackup commands, you are recommended to specify + it once in the BACKUP_PATH environment variable. In this case, + you do not need to use this option each time on the command + line. + + + + + + + +PGDATA + + + Specifies the absolute path to the data directory of the + database cluster. This option is mandatory only for the + command. + Other commands can take its value from the PGDATA environment + variable, or from the pg_probackup.conf configuration file. + + + + + + + + + + Specifies the unique identifier of the backup. + + + + + + + + + + Sets the number of parallel threads for backup, + restore, merge, + validate, checkdb, and + archive-push processes. + + + + + + + + + Shows the progress of operations. + + + + + + + + + Shows detailed information about the options that can be used + with this command. + + + + + + + + Recovery Target Options + + If + continuous + WAL archiving is configured, you can use one of these + options together with + or commands to + specify the moment up to which the database cluster must be + restored or validated. + + + + + + + + Defines when to stop the recovery: + + + + The immediate value stops the recovery + after reaching the consistent state of the specified + backup, or the latest available backup if the + / option is omitted. + This is the default behavior for STREAM backups. + + + + + The latest value continues the recovery + until all WAL segments available in the archive are + applied. This is the default behavior for ARCHIVE backups. + + + + + + + + + + + + Specifies a particular timeline to be used for recovery. + By default, the timeline of the specified backup is used. + + + + + + + + + Specifies the LSN of the write-ahead log location up to which + recovery will proceed. Can be used only when restoring + a database cluster of major version 10 or higher. + + + + + + + + + Specifies a named savepoint up to which to restore the cluster. + + + + + + + + + Specifies the timestamp up to which recovery will proceed. + + + + + + + + + Specifies the transaction ID up to which recovery will + proceed. + + + + + + + + + + Specifies whether to stop just after the specified recovery + target (true), or just before the recovery target (false). + This option can only be used together with + , + , + or + options. The default + depends on the + recovery_target_inclusive + parameter. + + + + + + + + + Specifies + the + action the server should take when the recovery target + is reached. + + + Default: pause + + + + + + + + Retention Options + + You can use these options together with + and + commands. + + + For details on configuring retention policy, see the section + Configuring + Retention Policy. + + + + + + + + + + Specifies the number of full backup copies to keep in the data + directory. Must be a non-negative integer. The zero value disables + this setting. + + + Default: 0 + + + + + + + + + + Number of days of recoverability. Must be a non-negative integer. + The zero value disables this setting. + + + Default: 0 + + + + + + + + + Number of latest valid backups on every timeline that must + retain the ability to perform PITR. Must be a non-negative + integer. The zero value disables this setting. + + + Default: 0 + + + + + + + + + Deletes WAL files that are no longer required to restore the + cluster from any of the existing backups. + + + + + + + + + Deletes backups that do not conform to the retention policy + defined in the pg_probackup.conf configuration file. + + + + + + + + + + Merges the oldest incremental backup that satisfies the + requirements of retention policy with its parent backups that + have already expired. + + + + + + + + + Displays the current status of all the available backups, + without deleting or merging expired backups, if any. + + + + + + + + Pinning Options + + You can use these options together with + and + commands. + + + For details on backup pinning, see the section + Backup Pinning. + + + + + + + + Specifies the amount of time the backup should be pinned. + Must be a non-negative integer. The zero value unpins the already + pinned backup. Supported units: ms, s, min, h, d (s by + default). + + + Example: --ttl=30d + + + + + + + + + Specifies the timestamp up to which the backup will stay + pinned. Must be an ISO-8601 complaint timestamp. + + + Example: --expire-time='2020-01-01 00:00:00+03' + + + + + + + + Logging Options + + You can use these options with any command. + + + + + + + + Controls which message levels are sent to the console log. + Valid values are verbose, + log, info, + warning, error and + off. Each level includes all the levels + that follow it. The later the level, the fewer messages are + sent. The off level disables console + logging. + + + Default: info + + + + All console log messages are going to stderr, so + the output of and + commands does + not mingle with log messages. + + + + + + + + + + Controls which message levels are sent to a log file. Valid + values are verbose, log, + info, warning, + error, and off. Each + level includes all the levels that follow it. The later the + level, the fewer messages are sent. The off + level disables file logging. + + + Default: off + + + + + + + + + Defines the filenames of the created log files. The filenames + are treated as a strftime pattern, so you can use %-escapes to + specify time-varying filenames. + + + Default: pg_probackup.log + + + For example, if you specify the pg_probackup-%u.log pattern, + pg_probackup generates a separate log file for each day of the + week, with %u replaced by the corresponding decimal number: + pg_probackup-1.log for Monday, pg_probackup-2.log for Tuesday, + and so on. + + + This option takes effect if file logging is enabled by the + option. + + + + + + + + + Defines the filenames of log files for error messages only. + The filenames are treated as a strftime pattern, so you can + use %-escapes to specify time-varying filenames. + + + Default: none + + + For example, if you specify the error-pg_probackup-%u.log + pattern, pg_probackup generates a separate log file for each + day of the week, with %u replaced by the corresponding decimal + number: error-pg_probackup-1.log for Monday, + error-pg_probackup-2.log for Tuesday, and so on. + + + This option is useful for troubleshooting and monitoring. + + + + + + + + + Defines the directory in which log files will be created. You + must specify the absolute path. This directory is created + lazily, when the first log message is written. + + + Default: $BACKUP_PATH/log/ + + + + + + + + + Maximum size of an individual log file. If this value is + reached, the log file is rotated once a pg_probackup command + is launched, except help and version commands. The zero value + disables size-based rotation. Supported units: kB, MB, GB, TB + (kB by default). + + + Default: 0 + + + + + + + + + Maximum lifetime of an individual log file. If this value is + reached, the log file is rotated once a pg_probackup command + is launched, except help and version commands. The time of the + last log file creation is stored in + $BACKUP_PATH/log/log_rotation. The zero value disables + time-based rotation. Supported units: ms, s, min, h, d (min by + default). + + + Default: 0 + + + + + + + + Connection Options + + You can use these options together with + and + commands. + + + All + libpq + environment variables are supported. + + + + + + +PGDATABASE + + + Specifies the name of the database to connect to. The + connection is used only for managing backup process, so you + can connect to any existing database. If this option is not + provided on the command line, PGDATABASE environment variable, + or the pg_probackup.conf configuration file, pg_probackup + tries to take this value from the PGUSER environment variable, + or from the current user name if PGUSER variable is not set. + + + + + + + +PGHOST + + + Specifies the host name of the system on which the server is + running. If the value begins with a slash, it is used as a + directory for the Unix domain socket. + + + Default: localhost + + + + + + + +PGPORT + + + Specifies the TCP port or the local Unix domain socket file + extension on which the server is listening for connections. + + + Default: 5432 + + + + + + + +PGUSER + + + User name to connect as. + + + + + + + + + + Disables a password prompt. If the server requires password + authentication and a password is not available by other means + such as a + .pgpass + file or PGPASSWORD environment variable, the connection + attempt will fail. This flag can be useful in batch jobs and + scripts where no user is present to enter a password. + + + + + + + + + + + Forces a password prompt. (Deprecated) + + + + + + + + Compression Options + + You can use these options together with + and + commands. + + + + + + + + Defines the algorithm to use for compressing data files. + Possible values are zlib, + pglz, and none. If set + to zlib or pglz, this option enables compression. By default, + compression is disabled. For the + command, the + pglz compression algorithm is not supported. + + + Default: none + + + + + + + + + Defines compression level (0 through 9, 0 being no compression + and 9 being best compression). This option can be used + together with the option. + + + Default: 1 + + + + + + + + + Alias for --compress-algorithm=zlib and + --compress-level=1. + + + + + + + + Archiving Options + + These options can be used with the + command in the + archive_command + setting and the + command in the + restore_command + setting. + + + Additionally, remote mode + options and logging + options can be used. + + + + + + + + Provides the path to the WAL file in + archive_command and + restore_command. Use the %p + variable as the value for this option for correct processing. + + + + + + + + + Provides the name of the WAL file in + archive_command and + restore_command. Use the %f + variable as the value for this option for correct processing. + + + + + + + + + Overwrites archived WAL file. Use this flag together with the + command if + the specified subdirectory of the backup catalog already + contains this WAL file and it needs to be replaced with its + newer copy. Otherwise, archive-push reports that a WAL segment + already exists, and aborts the operation. If the file to + replace has not changed, archive-push skips this file + regardless of the flag. + + + + + + + + + Sets the maximum number of files that can be copied into the archive + by a single archive-push process, or from + the archive by a single archive-get process. + + + + + + + + + Sets the timeout for considering existing .part + files to be stale. By default, pg_probackup + waits 300 seconds. + This option can be used only with command. + + + + + + + + + Do not rename status files in the archive_status directory. + This option should be used only if archive_command + contains multiple commands. + This option can be used only with command. + + + + + + + + + Do not sync copied WAL files to disk. You can use this flag to speed + up archiving process. Using this flag can result in WAL archive + corruption in case of operating system or hardware crash. + This option can be used only with command. + + + + + + + + + Directory used to store prefetched WAL segments if option is used. + Directory must be located on the same filesystem and on the same mountpoint the + PGDATA/pg_wal is located. + By default files are stored in PGDATA/pg_wal/pbk_prefetch directory. + This option can be used only with command. + + + + + + + + + Do not validate prefetched WAL file before using it. + Use this option if you want to increase the speed of recovery. + This option can be used only with command. + + + + + + + + + Remote Mode Options + + This section describes the options related to running + pg_probackup operations remotely via SSH. These options can be + used with , + , + , + , + , and + commands. + + + For details on configuring and using the remote mode, + see and + . + + + + + + + + Specifies the protocol to use for remote operations. Currently + only the SSH protocol is supported. Possible values are: + + + + + ssh enables the remote mode via + SSH. This is the default value. + + + + + none explicitly disables the remote + mode. + + + + + You can omit this option if the + option is specified. + + + + + + + + + + Specifies the remote host IP address or hostname to connect + to. + + + + + + + + + Specifies the remote host port to connect to. + + + Default: 22 + + + + + + + + + Specifies remote host user for SSH connection. If you omit + this option, the current user initiating the SSH connection is + used. + + + + + + + + + Specifies pg_probackup installation directory on the remote + system. + + + + + + + + + Provides a string of SSH command-line options. For example, + the following options can be used to set keep-alive for SSH + connections opened by pg_probackup: + --ssh-options='-o ServerAliveCountMax=5 -o ServerAliveInterval=60'. + For the full list of possible options, see + ssh_config + manual page. + + + + + + + + Remote WAL Archive Options + + This section describes the options used to provide the + arguments for remote mode + options in + used in the + restore_command + command when restoring ARCHIVE backups or performing PITR. + + + + + + + + Provides the argument for the + option in the archive-get command. + + + + + + + + + Provides the argument for the + option in the archive-get command. + + + Default: 22 + + + + + + + + + Provides the argument for the + option in the archive-get command. If you omit + this option, the user that has started the PostgreSQL cluster is used. + + + Default: PostgreSQL user + + + + + + + + Incremental Restore Options + + This section describes the options for incremental cluster restore. + These options can be used with the + command. + + + + + + + + + Specifies the incremental mode to be used. Possible values are: + + + + + CHECKSUM — replace only pages with mismatched checksum and LSN. + + + + + LSN — replace only pages with LSN greater than point of divergence. + + + + + NONE — regular restore. + + + + + + + + + + + Partial Restore Options + + This section describes the options for partial cluster restore. + These options can be used with the + command. + + + + + + + + Specifies the name of the database to exclude from restore. All other + databases in the cluster will be restored as usual, including + template0 and template1. + This option can be specified multiple times for multiple + databases. + + + + + + + + + Specifies the name of the database to restore from a backup. All other + databases in the cluster will not be restored, with the exception + of template0 and + template1. This option can be specified + multiple times for multiple databases. + + + + + + + + Replica Options + + This section describes the options related to taking a backup + from standby. + + + + Starting from pg_probackup 2.0.24, backups can be + taken from standby without connecting to the master server, + so these options are no longer required. In lower versions, + pg_probackup had to connect to the master to determine + recovery time — the earliest moment for which you can + restore a consistent state of the database cluster. + + + + + + + + + Deprecated. Specifies the name of the database on the master + server to connect to. The connection is used only for managing + the backup process, so you can connect to any existing + database. Can be set in the pg_probackup.conf using the + command. + + + Default: postgres, the default PostgreSQL database + + + + + + + + + Deprecated. Specifies the host name of the system on which the + master server is running. + + + + + + + + + Deprecated. Specifies the TCP port or the local Unix domain + socket file extension on which the master server is listening + for connections. + + + Default: 5432, the PostgreSQL default port + + + + + + + + + Deprecated. User name to connect as. + + + Default: postgres, + the PostgreSQL default user name + + + + + + + + + + Deprecated. Wait time for WAL segment streaming via + replication, in seconds. By default, pg_probackup waits 300 + seconds. You can also define this parameter in the + pg_probackup.conf configuration file using the + command. + + + Default: 300 sec + + + + + + + + + + + How-To + + All examples below assume the remote mode of operations via + SSH. If you are planning to run backup and + restore operation locally, skip the + Setup passwordless SSH connection step + and omit all options. + + + Examples are based on Ubuntu 18.04, + PostgreSQL 11, and pg_probackup + 2.2.0. + + + + + backupPostgreSQL + role used for connection to PostgreSQL + cluster. + + + + + backupdb — database used for connection + to PostgreSQL cluster. + + + + + backup_host — host with backup catalog. + + + + + backupman — user on + backup_host running all pg_probackup + operations. + + + + + /mnt/backups — directory on + backup_host where backup catalog is stored. + + + + + postgres_host — host with PostgreSQL + cluster. + + + + + postgres — user on + postgres_host that has started the PostgreSQL cluster. + + + + + /var/lib/postgresql/11/mainPostgreSQL + data directory on postgres_host. + + + + + Minimal Setup + + This scenario illustrates setting up standalone FULL and DELTA backups. + + + + Set up passwordless SSH connection from + <literal>backup_host</literal> to + <literal>postgres_host</literal>: + +[backupman@backup_host] ssh-copy-id postgres@postgres_host + + + + Configure your <productname>PostgreSQL</productname> cluster. + + For security purposes, it is recommended to use a separate + database for backup operations. + + +postgres=# +CREATE DATABASE backupdb; + + + Connect to the backupdb database, create the + probackup role, and grant the following + permissions to this role: + + +backupdb=# +BEGIN; +CREATE ROLE backup WITH LOGIN REPLICATION; +GRANT USAGE ON SCHEMA pg_catalog TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; +GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_checkpoint() TO backup; +COMMIT; + + + + Initialize the backup catalog: + +[backupman@backup_host]$ pg_probackup-11 init -B /mnt/backups +INFO: Backup catalog '/mnt/backups' successfully inited + + + + Add instance <literal>pg-11</literal> to the backup catalog: + +[backupman@backup_host]$ pg_probackup-11 add-instance -B /mnt/backups --instance 'pg-11' --remote-host=postgres_host --remote-user=postgres -D /var/lib/postgresql/11/main +INFO: Instance 'node' successfully inited + + + + Take a FULL backup: + +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' -b FULL --stream --remote-host=postgres_host --remote-user=postgres -U backup -d backupdb +INFO: Backup start, pg_probackup version: 2.2.0, instance: node, backup ID: PZ7YK2, backup mode: FULL, wal mode: STREAM, remote: true, compress-algorithm: none, compress-level: 1 +INFO: Start transferring data files +INFO: Data files are transferred +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +INFO: Validating backup PZ7YK2 +INFO: Backup PZ7YK2 data files are valid +INFO: Backup PZ7YK2 resident size: 196MB +INFO: Backup PZ7YK2 completed + + + + Let's take a look at the backup catalog: + +[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11' + +BACKUP INSTANCE 'pg-11' +================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +================================================================================================================================== + node 11 PZ7YK2 2019-10-11 19:45:45+03 FULL STREAM 1/0 11s 180MB 16MB 1.00 0/3C000028 0/3C000198 OK + + + + Take an incremental backup in the DELTA mode: + +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' -b delta --stream --remote-host=postgres_host --remote-user=postgres -U backup -d backupdb +INFO: Backup start, pg_probackup version: 2.2.0, instance: node, backup ID: PZ7YMP, backup mode: DELTA, wal mode: STREAM, remote: true, compress-algorithm: none, compress-level: 1 +INFO: Parent backup: PZ7YK2 +INFO: Start transferring data files +INFO: Data files are transferred +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +INFO: Validating backup PZ7YMP +INFO: Backup PZ7YMP data files are valid +INFO: Backup PZ7YMP resident size: 32MB +INFO: Backup PZ7YMP completed + + + + Let's add some parameters to <application>pg_probackup</application> + configuration file, so that you can omit them from the command line: + +[backupman@backup_host] pg_probackup-11 set-config -B /mnt/backups --instance 'pg-11' --remote-host=postgres_host --remote-user=postgres -U backup -d backupdb + + + + Take another incremental backup in the DELTA mode, omitting + some of the previous parameters: + +[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11' -b delta --stream +INFO: Backup start, pg_probackup version: 2.2.0, instance: node, backup ID: PZ7YR5, backup mode: DELTA, wal mode: STREAM, remote: true, compress-algorithm: none, compress-level: 1 +INFO: Parent backup: PZ7YMP +INFO: Start transferring data files +INFO: Data files are transferred +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +INFO: Validating backup PZ7YR5 +INFO: Backup PZ7YR5 data files are valid +INFO: Backup PZ7YR5 resident size: 32MB +INFO: Backup PZ7YR5 completed + + + + Let's take a look at the instance configuration: + +[backupman@backup_host] pg_probackup-11 show-config -B /mnt/backups --instance 'pg-11' + +# Backup instance information +pgdata = /var/lib/postgresql/11/main +system-identifier = 6746586934060931492 +xlog-seg-size = 16777216 +# Connection parameters +pgdatabase = backupdb +pghost = postgres_host +pguser = backup +# Replica parameters +replica-timeout = 5min +# Archive parameters +archive-timeout = 5min +# Logging parameters +log-level-console = INFO +log-level-file = OFF +log-filename = pg_probackup.log +log-rotation-size = 0 +log-rotation-age = 0 +# Retention parameters +retention-redundancy = 0 +retention-window = 0 +wal-depth = 0 +# Compression parameters +compress-algorithm = none +compress-level = 1 +# Remote access parameters +remote-proto = ssh +remote-host = postgres_host + + + Note that we are getting the default values for other options + that were not overwritten by the set-config command. + + + + Let's take a look at the backup catalog: + +[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11' + +==================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status +==================================================================================================================================== + node 11 PZ7YR5 2019-10-11 19:49:56+03 DELTA STREAM 1/1 10s 112kB 32MB 1.00 0/41000028 0/41000160 OK + node 11 PZ7YMP 2019-10-11 19:47:16+03 DELTA STREAM 1/1 10s 376kB 32MB 1.00 0/3E000028 0/3F0000B8 OK + node 11 PZ7YK2 2019-10-11 19:45:45+03 FULL STREAM 1/0 11s 180MB 16MB 1.00 0/3C000028 0/3C000198 OK + + + + + + + + Versioning + + pg_probackup follows + semantic versioning. + + + + + Authors + + + Postgres Professional, Moscow, Russia. + + + + Credits + + pg_probackup utility is based on pg_arman, + which was originally written by NTT and then developed and maintained by Michael Paquier. + + + + + + + diff --git a/doc/probackup.xml b/doc/probackup.xml new file mode 100644 index 000000000..8ea3cdc46 --- /dev/null +++ b/doc/probackup.xml @@ -0,0 +1,12 @@ + +]> + + + + pg_probackup Documentation +&pgprobackup; + + \ No newline at end of file diff --git a/doc/stylesheet.css b/doc/stylesheet.css new file mode 100644 index 000000000..4d84058f5 --- /dev/null +++ b/doc/stylesheet.css @@ -0,0 +1,420 @@ +@import url('https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&subset=cyrillic'); + +body { + font-family: 'Roboto',Arial,sans-serif; +} + +body { + font-size: 18px; + font-weight: 300; +} + +/* ../media/css/docs.css */ +.navheader th { text-align: center; } /* anti-bootstrap */ + +.navheader tbody tr:nth-child(1) th { /* временно убрать ненужную строчку */ + display: none; +} + +/* PostgreSQL.org Documentation Style */ + +.book div.NAVHEADER table { + margin-left: 0; +} + +.book div.NAVHEADER th { + text-align: center; +} + +.book { + font-size: 15px; + line-height: 1.6; +} + +/* Heading Definitions */ + +.book h1, +.book h2, +.book h3 { + font-weight: bold; + margin-top: 2ex; +} + +.book h1 a, +.book h2 a, +.book h3 a, +.book h4 a + { + color: #EC5800; +} + +/* EKa --> */ +.book h1 { + font-size: 1.4em; +} + +.book h2 { + font-size: 1.25em; +} + +.book h3 { + font-size: 1.2em; +} + +.book h4 { + font-size: 1.15em; +} + +.book h5 { + font-size: 1.1em; +} + +.book h6 { + font-size: 1.0em; +} +/* <-- EKa */ + +.book h1 a:hover { + color: #EC5800; + text-decoration: none; +} + +.book h2 a:hover, +.book h3 a:hover, +.book h4 a:hover { + color: #666666; + text-decoration: none; +} + + + +/* Text Styles */ + +.book div.SECT2 { + margin-top: 4ex; +} + +.book div.SECT3 { + margin-top: 3ex; + margin-left: 3ex; +} + +.book .txtCurrentLocation { + font-weight: bold; +} + +.book p, +.book ol, +.book ul, +.book li { + line-height: 1.5em; +} + +.book code { + font-size: 1em; + padding: 0px; + color: #525f6c; + background-color: #FFF; + border-radius: 0px; +} + +.book code, kbd, pre, samp { + font-family: monospace,monospace; +} + +.book .txtCommentsWrap { + border: 2px solid #F5F5F5; + width: 100%; +} + +.book .txtCommentsContent { + background: #F5F5F5; + padding: 3px; +} + +.book .txtCommentsPoster { + float: left; +} + +.book .txtCommentsDate { + float: right; +} + +.book .txtCommentsComment { + padding: 3px; +} + +.book #docContainer pre code, +.book #docContainer pre tt, +.book #docContainer pre pre, +.book #docContainer tt tt, +.book #docContainer tt code, +.book #docContainer tt pre { + font-size: 1em; +} + +.book pre.LITERALLAYOUT, +.book .SCREEN, +.book .SYNOPSIS, +.book .PROGRAMLISTING, +.book .REFSYNOPSISDIV p, +.book table.CAUTION, +.book table.WARNING, +.book blockquote.NOTE, +.book blockquote.TIP, +.book div.note, +.book div.tip, +.book table.CALSTABLE { + -moz-box-shadow: 3px 3px 5px #DFDFDF; + -webkit-box-shadow: 3px 3px 5px #DFDFDF; + -khtml-box-shadow: 3px 3px 5px #DFDFDF; + -o-box-shadow: 3px 3px 5px #DFDFDF; + box-shadow: 3px 3px 5px #DFDFDF; +} + +.book pre.LITERALLAYOUT, +.book .SCREEN, +.book .SYNOPSIS, +.book .PROGRAMLISTING, +.book .REFSYNOPSISDIV p, +.book table.CAUTION, +.book table.WARNING, +.book blockquote.NOTE, +.book blockquote.TIP +.book div.note, +.book div.tip { + color: black; + border-width: 1px; + border-style: solid; + padding: 2ex; + margin: 2ex 0 2ex 2ex; + overflow: auto; + -moz-border-radius: 8px; + -webkit-border-radius: 8px; + -khtml-border-radius: 8px; + border-radius: 8px; +} + +.book div.note, +.book div.tip { + -moz-border-radius: 8px !important; + -webkit-border-radius: 8px !important; + -khtml-border-radius: 8px !important; + border-radius: 8px !important; +} + + +.book pre.LITERALLAYOUT, +.book pre.SYNOPSIS, +.book pre.PROGRAMLISTING, +.book .REFSYNOPSISDIV p, +.book .SCREEN { + border-color: #CFCFCF; + background-color: #F7F7F7; +} + +.book blockquote.NOTE, +.book blockquote.TIP, +.book div.note, +.book div.tip { + border-color: #DBDBCC; + background-color: #EEEEDD; + padding: 14px; + width: 572px; +/* font-size: 12px; */ +} + +.book blockquote.NOTE, +.book blockquote.TIP, +.book table.CAUTION, +.book table.WARNING { + margin: 4ex auto; +} + +.book div.note, +.book div.tip { + margin: 4ex auto !important; +} + + +.book blockquote.NOTE p, +.book blockquote.TIP p, +.book div.note p, +.book div.tip p { + margin: 0; +} + +.book blockquote.NOTE pre, +.book blockquote.NOTE code, +.book div.note pre, +.book div.note code, +.book blockquote.TIP pre, +.book blockquote.TIP code, +.book div.tip pre, +.book div.tio code { + margin-left: 0; + margin-right: 0; + -moz-box-shadow: none; + -webkit-box-shadow: none; + -khtml-box-shadow: none; + -o-box-shadow: none; + box-shadow: none; +} + +.book .emphasis, +.book .c2 { + font-weight: bold; +} + +.book .REPLACEABLE { + font-style: italic; +} + +/* Table Styles */ + +.book table { + margin-left: 2ex; +} + +.book table.CALSTABLE td, +.book table.CALSTABLE th, +.book table.CAUTION td, +.book table.CAUTION th, +.book table.WARNING td, +.book table.WARNING th { + border-style: solid; +} + +.book table.CALSTABLE, +.book table.CAUTION, +.book table.WARNING { + border-spacing: 0; + border-collapse: collapse; +} + +.book table.CALSTABLE +{ + margin: 2ex 0 2ex 2ex; + background-color: #E0ECEF; + border: 2px solid #A7C6DF; +} + +.book table.CALSTABLE tr:hover td +{ + background-color: #EFEFEF; +} + +.book table.CALSTABLE td { + background-color: #FFF; +} + +.book table.CALSTABLE td, +.book table.CALSTABLE th { + border: 1px solid #A7C6DF; + padding: 0.5ex 0.5ex; +} + +table.CAUTION, +.book table.WARNING { + border-collapse: separate; + display: block; + padding: 0; + max-width: 600px; +} + +.book table.CAUTION { + background-color: #F5F5DC; + border-color: #DEDFA7; +} + +.book table.WARNING { + background-color: #FFD7D7; + border-color: #DF421E; +} + +.book table.CAUTION td, +.book table.CAUTION th, +.book table.WARNING td, +.book table.WARNING th { + border-width: 0; + padding-left: 2ex; + padding-right: 2ex; +} + +.book table.CAUTION td, +.book table.CAUTION th { + border-color: #F3E4D5 +} + +.book table.WARNING td, +.book table.WARNING th { + border-color: #FFD7D7; +} + +.book td.c1, +.book td.c2, +.book td.c3, +.book td.c4, +.book td.c5, +.book td.c6 { + font-size: 1.1em; + font-weight: bold; + border-bottom: 0px solid #FFEFEF; + padding: 1ex 2ex 0; +} + +.book .table thead { + background: #E0ECEF; + border-bottom: 1px solid #000; +} +.book .table > thead > tr > th { + border-bottom: 1px solid #000; +} + +.book td, th { + padding: 0.1ex 0.5ex; +} + +.book .book table tr:hover td { + background-color: #EFEFEF; +} + +/* Link Styles */ + +.book #docNav a { + font-weight: bold; +} + +.book code.FUNCTION tt { + font-size: 1em; +} + +.book table.docs-compare { + align: center; + width: 90%; + border: 2px solid #999; + border-collapse: collapse; +} + +.book table.docs-compare td { + padding: 12px; + border: 1px solid #DDD; +} + +.book dd { + margin-left: 40px; +} + + +.book .sidebar { + padding: 8px; + background: #FFF; + width: auto; +} + +.book pre { + background: #f5f5f5; + padding: 10px; + border: 1px solid #ccc; + border-radius: 4px; +} diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl new file mode 100644 index 000000000..466127e9c --- /dev/null +++ b/doc/stylesheet.xsl @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +book/reference toc, title + + + diff --git a/doit.cmd b/doit.cmd deleted file mode 100644 index b46e3b36d..000000000 --- a/doit.cmd +++ /dev/null @@ -1 +0,0 @@ -perl win32build.pl "C:\PgProject\pgwininstall-ee\builddir\distr_X64_10.4.1\postgresql" "C:\PgProject\pgwininstall-ee\builddir\postgresql\postgrespro-enterprise-10.4.1\src" \ No newline at end of file diff --git a/doit96.cmd b/doit96.cmd deleted file mode 100644 index 94d242c99..000000000 --- a/doit96.cmd +++ /dev/null @@ -1 +0,0 @@ -perl win32build96.pl "C:\PgPro96" "C:\PgProject\pg96ee\postgrespro\src" \ No newline at end of file diff --git a/gen_probackup_project.pl b/gen_probackup_project.pl index 3ea79e96c..b78f4699e 100644 --- a/gen_probackup_project.pl +++ b/gen_probackup_project.pl @@ -1,13 +1,18 @@ # -*-perl-*- hey - emacs - this is a perl file -BEGIN{ +# my $currpath = cwd(); + +our $pgsrc; +our $currpath; + +BEGIN { +# path to the pg_probackup dir +$currpath = File::Basename::dirname(Cwd::abs_path($0)); use Cwd; use File::Basename; - -my $pgsrc=""; -if (@ARGV==1) +if (($#ARGV+1)==1) { $pgsrc = shift @ARGV; - if($pgsrc == "--help"){ + if($pgsrc eq "--help"){ print STDERR "Usage $0 pg-source-dir \n"; print STDERR "Like this: \n"; print STDERR "$0 C:/PgProject/postgresql.10dev/postgrespro \n"; @@ -23,14 +28,13 @@ BEGIN chdir($path); chdir("../.."); $pgsrc = cwd(); + $currpath = "contrib/pg_probackup"; } - chdir("$pgsrc/src/tools/msvc"); push(@INC, "$pgsrc/src/tools/msvc"); chdir("../../..") if (-d "../msvc" && -d "../../../src"); } - use Win32; use Carp; use strict; @@ -57,6 +61,16 @@ BEGIN my $libpq; my @unlink_on_exit; +if (-d "src/fe_utils") +{ + $libpgfeutils = 1; +} +else +{ + $libpgfeutils = 0; +} + + use lib "src/tools/msvc"; @@ -84,22 +98,27 @@ BEGIN my $bconf = $ENV{CONFIG} || "Release"; my $msbflags = $ENV{MSBFLAGS} || ""; my $buildwhat = $ARGV[1] || ""; -if (uc($ARGV[0]) eq 'DEBUG') -{ - $bconf = "Debug"; -} -elsif (uc($ARGV[0]) ne "RELEASE") -{ - $buildwhat = $ARGV[0] || ""; -} +# if (uc($ARGV[0]) eq 'DEBUG') +# { +# $bconf = "Debug"; +# } +# elsif (uc($ARGV[0]) ne "RELEASE") +# { +# $buildwhat = $ARGV[0] || ""; +# } + +# printf "currpath=$currpath"; + +# exit(0); # ... and do it system("msbuild pg_probackup.vcxproj /verbosity:normal $msbflags /p:Configuration=$bconf" ); - # report status my $status = $? >> 8; +printf("Status: $status\n"); +printf("Output file built in the folder $pgsrc/$bconf/pg_probackup\n"); exit $status; @@ -120,15 +139,19 @@ sub build_pgprobackup $libpq = $solution->AddProject('libpq', 'dll', 'interfaces', 'src/interfaces/libpq'); - $libpgfeutils = $solution->AddProject('libpgfeutils', 'lib', 'misc'); + if ($libpgfeutils) + { + $libpgfeutils = $solution->AddProject('libpgfeutils', 'lib', 'misc'); + } $libpgcommon = $solution->AddProject('libpgcommon', 'lib', 'misc'); $libpgport = $solution->AddProject('libpgport', 'lib', 'misc'); #vvs test my $probackup = - $solution->AddProject('pg_probackup', 'exe', 'pg_probackup'); #, 'contrib/pg_probackup' + $solution->AddProject("pg_probackup", 'exe', "pg_probackup"); #, 'contrib/pg_probackup' + $probackup->AddDefine('FRONTEND'); $probackup->AddFiles( - 'contrib/pg_probackup/src', + "$currpath/src", 'archive.c', 'backup.c', 'catalog.c', @@ -139,49 +162,65 @@ sub build_pgprobackup 'fetch.c', 'help.c', 'init.c', + 'merge.c', 'parsexlog.c', 'pg_probackup.c', 'restore.c', 'show.c', - 'status.c', 'util.c', - 'validate.c' + 'validate.c', + 'checkdb.c', + 'ptrack.c' ); $probackup->AddFiles( - 'contrib/pg_probackup/src/utils', + "$currpath/src/utils", + 'configuration.c', + 'file.c', + 'remote.c', 'json.c', 'logger.c', 'parray.c', 'pgut.c', - 'thread.c' + 'thread.c', + 'remote.c' ); - $probackup->AddFile('src/backend/access/transam/xlogreader.c'); + $probackup->AddFile("$pgsrc/src/backend/access/transam/xlogreader.c"); + $probackup->AddFile("$pgsrc/src/backend/utils/hash/pg_crc.c"); $probackup->AddFiles( - 'src/bin/pg_basebackup', + "$pgsrc/src/bin/pg_basebackup", 'receivelog.c', 'streamutil.c' ); - if (-e 'src/bin/pg_basebackup/walmethods.c') + if (-e "$pgsrc/src/bin/pg_basebackup/walmethods.c") { - $probackup->AddFile('src/bin/pg_basebackup/walmethods.c'); + $probackup->AddFile("$pgsrc/src/bin/pg_basebackup/walmethods.c"); } - $probackup->AddFile('src/bin/pg_rewind/datapagemap.c'); + $probackup->AddFile("$pgsrc/src/bin/pg_rewind/datapagemap.c"); - $probackup->AddFile('src/interfaces/libpq/pthread-win32.c'); + $probackup->AddFile("$pgsrc/src/interfaces/libpq/pthread-win32.c"); + $probackup->AddFile("$pgsrc/src/timezone/strftime.c"); - $probackup->AddIncludeDir('src/bin/pg_basebackup'); - $probackup->AddIncludeDir('src/bin/pg_rewind'); - $probackup->AddIncludeDir('src/interfaces/libpq'); - $probackup->AddIncludeDir('src'); - $probackup->AddIncludeDir('src/port'); + $probackup->AddIncludeDir("$pgsrc/src/bin/pg_basebackup"); + $probackup->AddIncludeDir("$pgsrc/src/bin/pg_rewind"); + $probackup->AddIncludeDir("$pgsrc/src/interfaces/libpq"); + $probackup->AddIncludeDir("$pgsrc/src"); + $probackup->AddIncludeDir("$pgsrc/src/port"); + $probackup->AddIncludeDir("$pgsrc/src/include/portability"); - $probackup->AddIncludeDir('contrib/pg_probackup'); - $probackup->AddIncludeDir('contrib/pg_probackup/src'); - $probackup->AddIncludeDir('contrib/pg_probackup/src/utils'); + $probackup->AddIncludeDir("$currpath"); + $probackup->AddIncludeDir("$currpath/src"); + $probackup->AddIncludeDir("$currpath/src/utils"); - $probackup->AddReference($libpq, $libpgfeutils, $libpgcommon, $libpgport); + if ($libpgfeutils) + { + $probackup->AddReference($libpq, $libpgfeutils, $libpgcommon, $libpgport); + } + else + { + $probackup->AddReference($libpq, $libpgcommon, $libpgport); + } $probackup->AddLibrary('ws2_32.lib'); $probackup->Save(); diff --git a/msvs/pg_probackup.sln b/msvs/pg_probackup.sln deleted file mode 100644 index 2df4b4042..000000000 --- a/msvs/pg_probackup.sln +++ /dev/null @@ -1,28 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Express 2013 for Windows Desktop -VisualStudioVersion = 12.0.31101.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pg_probackup", "pg_probackup.vcxproj", "{4886B21A-D8CA-4A03-BADF-743B24C88327}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Debug|Win32.ActiveCfg = Debug|Win32 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Debug|Win32.Build.0 = Debug|Win32 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Debug|x64.ActiveCfg = Debug|x64 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Debug|x64.Build.0 = Debug|x64 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Release|Win32.ActiveCfg = Release|Win32 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Release|Win32.Build.0 = Release|Win32 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Release|x64.ActiveCfg = Release|x64 - {4886B21A-D8CA-4A03-BADF-743B24C88327}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/msvs/template.pg_probackup.vcxproj b/msvs/template.pg_probackup.vcxproj deleted file mode 100644 index 46a7b2c24..000000000 --- a/msvs/template.pg_probackup.vcxproj +++ /dev/null @@ -1,212 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {4886B21A-D8CA-4A03-BADF-743B24C88327} - Win32Proj - pg_probackup - - - - Application - true - v120 - MultiByte - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - - - true - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - true - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - false - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - false - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - - - - Level3 - Disabled - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - @ADDLIBS32@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - - - - - - - - Level3 - Disabled - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - true - true - @ADDLIBS32@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - %(AdditionalLibraryDirectories) - libc;%(IgnoreSpecificDefaultLibraries) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - true - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - %(AdditionalLibraryDirectories) - libc;%(IgnoreSpecificDefaultLibraries) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/msvs/template.pg_probackup96.vcxproj b/msvs/template.pg_probackup96.vcxproj deleted file mode 100644 index 46e019ba4..000000000 --- a/msvs/template.pg_probackup96.vcxproj +++ /dev/null @@ -1,210 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {4886B21A-D8CA-4A03-BADF-743B24C88327} - Win32Proj - pg_probackup - - - - Application - true - v120 - MultiByte - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - - - true - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - true - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - false - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - false - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;@PGSRC@;$(IncludePath) - @PGROOT@\lib;$(LibraryPath) - - - - - - - Level3 - Disabled - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - @ADDLIBS32@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - - - - - - - - Level3 - Disabled - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - true - true - @ADDLIBS32@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - %(AdditionalLibraryDirectories) - libc;%(IgnoreSpecificDefaultLibraries) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - true - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - %(AdditionalLibraryDirectories) - libc;%(IgnoreSpecificDefaultLibraries) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/msvs/template.pg_probackup_2.vcxproj b/msvs/template.pg_probackup_2.vcxproj deleted file mode 100644 index 2fc101a42..000000000 --- a/msvs/template.pg_probackup_2.vcxproj +++ /dev/null @@ -1,203 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {4886B21A-D8CA-4A03-BADF-743B24C88327} - Win32Proj - pg_probackup - - - - Application - true - v120 - MultiByte - - - Application - true - v120 - MultiByte - - - Application - false - v120 - true - MultiByte - - - Application - false - v120 - true - MultiByte - - - - - - - - - - - - - - - - - - - true - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;$(IncludePath) - @PGROOT@\lib;@$(LibraryPath) - - - true - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;$(IncludePath) - @PGROOT@\lib;@$(LibraryPath) - - - false - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;$(IncludePath) - @PGROOT@\lib;@$(LibraryPath) - - - false - ../;@PGSRC@\include;@PGSRC@\bin\pg_basebackup;@PGSRC@\bin\pg_rewind;@PGSRC@\include\port\win32_msvc;@PGSRC@\interfaces\libpq;@PGSRC@\include\port\win32;@PGSRC@\port;@ADDINCLUDE@;$(IncludePath) - @PGROOT@\lib;@$(LibraryPath) - - - - - - Level3 - Disabled - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - - - - - - - - Level3 - Disabled - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - true - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - libc;%(IgnoreSpecificDefaultLibraries) - - - - - Level3 - - - MaxSpeed - true - true - _CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - true - - - Console - true - true - true - @ADDLIBS@;libpgfeutils.lib;libpgcommon.lib;libpgport.lib;libpq.lib;ws2_32.lib;%(AdditionalDependencies) - libc;%(IgnoreSpecificDefaultLibraries) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/archive.c b/src/archive.c index 8890ccce3..4780db063 100644 --- a/src/archive.c +++ b/src/archive.c @@ -3,111 +3,1733 @@ * archive.c: - pg_probackup specific archive commands for archive backups. * * - * Portions Copyright (c) 2017, Postgres Professional + * Portions Copyright (c) 2018-2019, Postgres Professional * *------------------------------------------------------------------------- */ -#include "pg_probackup.h" #include -#include +#include "pg_probackup.h" +#include "utils/thread.h" +#include "instr_time.h" + +static int push_file_internal_uncompressed(const char *wal_file_name, const char *pg_xlog_dir, + const char *archive_dir, bool overwrite, bool no_sync, + uint32 archive_timeout); +#ifdef HAVE_LIBZ +static int push_file_internal_gz(const char *wal_file_name, const char *pg_xlog_dir, + const char *archive_dir, bool overwrite, bool no_sync, + int compress_level, uint32 archive_timeout); +#endif +static void *push_files(void *arg); +static void *get_files(void *arg); +static bool get_wal_file(const char *filename, const char *from_path, const char *to_path, + bool prefetch_mode); +static int get_wal_file_internal(const char *from_path, const char *to_path, FILE *out, + bool is_decompress); +#ifdef HAVE_LIBZ +static const char *get_gz_error(gzFile gzf, int errnum); +#endif +//static void copy_file_attributes(const char *from_path, +// fio_location from_location, +// const char *to_path, fio_location to_location, +// bool unlink_on_error); + +static bool next_wal_segment_exists(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size); +static uint32 run_wal_prefetch(const char *prefetch_dir, const char *archive_dir, TimeLineID tli, + XLogSegNo first_segno, int num_threads, bool inclusive, int batch_size, + uint32 wal_seg_size); +static bool wal_satisfy_from_prefetch(TimeLineID tli, XLogSegNo segno, const char *wal_file_name, + const char *prefetch_dir, const char *absolute_wal_file_path, + uint32 wal_seg_size, bool parse_wal); + +static uint32 maintain_prefetch(const char *prefetch_dir, XLogSegNo first_segno, uint32 wal_seg_size); + +static bool prefetch_stop = false; +static uint32 xlog_seg_size; + +typedef struct +{ + const char *first_filename; + const char *pg_xlog_dir; + const char *archive_dir; + const char *archive_status_dir; + bool overwrite; + bool compress; + bool no_sync; + bool no_ready_rename; + uint32 archive_timeout; + + CompressAlg compress_alg; + int compress_level; + int thread_num; + + parray *files; + + uint32 n_pushed; + uint32 n_skipped; + + /* + * Return value from the thread. + * 0 means there is no error, + * 1 - there is an error. + * 2 - no error, but nothing to push + */ + int ret; +} archive_push_arg; + +typedef struct +{ + const char *prefetch_dir; + const char *archive_dir; + int thread_num; + parray *files; + uint32 n_fetched; +} archive_get_arg; + +typedef struct WALSegno +{ + char name[MAXFNAMELEN]; + volatile pg_atomic_flag lock; +} WALSegno; + +static int push_file(WALSegno *xlogfile, const char *archive_status_dir, + const char *pg_xlog_dir, const char *archive_dir, + bool overwrite, bool no_sync, uint32 archive_timeout, + bool no_ready_rename, bool is_compress, + int compress_level); + +static parray *setup_push_filelist(const char *archive_status_dir, + const char *first_file, int batch_size); /* + * At this point, we already done one roundtrip to archive server + * to get instance config. + * * pg_probackup specific archive command for archive backups - * set archive_command = 'pg_probackup archive-push -B /home/anastasia/backup - * --wal-file-path %p --wal-file-name %f', to move backups into arclog_path. - * Where archlog_path is $BACKUP_PATH/wal/system_id. - * Currently it just copies wal files to the new location. - * TODO: Planned options: list the arclog content, - * compute and validate checksums. + * set archive_command to + * 'pg_probackup archive-push -B /home/anastasia/backup --wal-file-name %f', + * to move backups into arclog_path. + * Where archlog_path is $BACKUP_PATH/wal/instance_name */ -int -do_archive_push(char *wal_file_path, char *wal_file_name, bool overwrite) +void +do_archive_push(InstanceConfig *instance, char *wal_file_path, + char *wal_file_name, int batch_size, bool overwrite, + bool no_sync, bool no_ready_rename) { - char backup_wal_file_path[MAXPGPATH]; - char absolute_wal_file_path[MAXPGPATH]; + uint64 i; char current_dir[MAXPGPATH]; - int64 system_id; - pgBackupConfig *config; + char pg_xlog_dir[MAXPGPATH]; + char archive_status_dir[MAXPGPATH]; + uint64 system_id; bool is_compress = false; - if (wal_file_name == NULL && wal_file_path == NULL) - elog(ERROR, "required parameters are not specified: --wal-file-name %%f --wal-file-path %%p"); + /* arrays with meta info for multi threaded backup */ + pthread_t *threads; + archive_push_arg *threads_args; + bool push_isok = true; - if (wal_file_name == NULL) - elog(ERROR, "required parameter not specified: --wal-file-name %%f"); + /* reporting */ + uint32 n_total_pushed = 0; + uint32 n_total_skipped = 0; + uint32 n_total_failed = 0; + instr_time start_time, end_time; + double push_time; + char pretty_time_str[20]; - if (wal_file_path == NULL) - elog(ERROR, "required parameter not specified: --wal-file-path %%p"); + /* files to push in multi-thread mode */ + parray *batch_files = NULL; + int n_threads; + + if (wal_file_name == NULL) + elog(ERROR, "Required parameter is not specified: --wal-file-name %%f"); if (!getcwd(current_dir, sizeof(current_dir))) elog(ERROR, "getcwd() error"); /* verify that archive-push --instance parameter is valid */ - config = readBackupCatalogConfigFile(); system_id = get_system_identifier(current_dir); - if (config->pgdata == NULL) - elog(ERROR, "cannot read pg_probackup.conf for this instance"); + if (instance->pgdata == NULL) + elog(ERROR, "Cannot read pg_probackup.conf for this instance"); - if(system_id != config->system_identifier) + if (system_id != instance->system_identifier) elog(ERROR, "Refuse to push WAL segment %s into archive. Instance parameters mismatch." - "Instance '%s' should have SYSTEM_ID = %ld instead of %ld", - wal_file_name, instance_name, config->system_identifier, system_id); + "Instance '%s' should have SYSTEM_ID = " UINT64_FORMAT " instead of " UINT64_FORMAT, + wal_file_name, instance->name, instance->system_identifier, system_id); + + if (instance->compress_alg == PGLZ_COMPRESS) + elog(ERROR, "Cannot use pglz for WAL compression"); + + join_path_components(pg_xlog_dir, current_dir, XLOGDIR); + join_path_components(archive_status_dir, pg_xlog_dir, "archive_status"); /* Create 'archlog_path' directory. Do nothing if it already exists. */ - dir_create_dir(arclog_path, DIR_PERMISSION); + //fio_mkdir(instance->arclog_path, DIR_PERMISSION, FIO_BACKUP_HOST); - join_path_components(absolute_wal_file_path, current_dir, wal_file_path); - join_path_components(backup_wal_file_path, arclog_path, wal_file_name); +#ifdef HAVE_LIBZ + if (instance->compress_alg == ZLIB_COMPRESS) + is_compress = true; +#endif + + /* Setup filelist and locks */ + batch_files = setup_push_filelist(archive_status_dir, wal_file_name, batch_size); + + n_threads = num_threads; + if (num_threads > parray_num(batch_files)) + n_threads = parray_num(batch_files); + + elog(INFO, "pg_probackup archive-push WAL file: %s, " + "threads: %i/%i, batch: %lu/%i, compression: %s", + wal_file_name, n_threads, num_threads, + parray_num(batch_files), batch_size, + is_compress ? "zlib" : "none"); + + num_threads = n_threads; + + /* Single-thread push + * We don`t want to start multi-thread push, if number of threads in equal to 1, + * or the number of files ready to push is small. + * Multithreading in remote mode isn`t cheap, + * establishing ssh connection can take 100-200ms, so running and terminating + * one thread using generic multithread approach can take + * almost as much time as copying itself. + * TODO: maybe we should be more conservative and force single thread + * push if batch_files array is small. + */ + if (num_threads == 1 || (parray_num(batch_files) == 1)) + { + INSTR_TIME_SET_CURRENT(start_time); + for (i = 0; i < parray_num(batch_files); i++) + { + int rc; + WALSegno *xlogfile = (WALSegno *) parray_get(batch_files, i); + + rc = push_file(xlogfile, archive_status_dir, + pg_xlog_dir, instance->arclog_path, + overwrite, no_sync, + instance->archive_timeout, + no_ready_rename || (strcmp(xlogfile->name, wal_file_name) == 0) ? true : false, + is_compress && IsXLogFileName(xlogfile->name) ? true : false, + instance->compress_level); + if (rc == 0) + n_total_pushed++; + else + n_total_skipped++; + } + + push_isok = true; + goto push_done; + } + + /* init thread args with its own segno */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (archive_push_arg *) palloc(sizeof(archive_push_arg) * num_threads); + + for (i = 0; i < num_threads; i++) + { + archive_push_arg *arg = &(threads_args[i]); - elog(INFO, "pg_probackup archive-push from %s to %s", absolute_wal_file_path, backup_wal_file_path); + arg->first_filename = wal_file_name; + arg->archive_dir = instance->arclog_path; + arg->pg_xlog_dir = pg_xlog_dir; + arg->archive_status_dir = archive_status_dir; + arg->overwrite = overwrite; + arg->compress = is_compress; + arg->no_sync = no_sync; + arg->no_ready_rename = no_ready_rename; + arg->archive_timeout = instance->archive_timeout; - if (compress_alg == PGLZ_COMPRESS) - elog(ERROR, "pglz compression is not supported"); + arg->compress_alg = instance->compress_alg; + arg->compress_level = instance->compress_level; + arg->files = batch_files; + arg->n_pushed = 0; + arg->n_skipped = 0; + + arg->thread_num = i+1; + /* By default there are some error */ + arg->ret = 1; + } + + /* Run threads */ + INSTR_TIME_SET_CURRENT(start_time); + for (i = 0; i < num_threads; i++) + { + archive_push_arg *arg = &(threads_args[i]); + pthread_create(&threads[i], NULL, push_files, arg); + } + + /* Wait threads */ + for (i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + if (threads_args[i].ret == 1) + { + push_isok = false; + n_total_failed++; + } + + n_total_pushed += threads_args[i].n_pushed; + n_total_skipped += threads_args[i].n_skipped; + } + + /* Note, that we are leaking memory here, + * because pushing into archive is a very + * time-sensetive operation, so we skip freeing stuff. + */ + +push_done: + fio_disconnect(); + /* calculate elapsed time */ + INSTR_TIME_SET_CURRENT(end_time); + INSTR_TIME_SUBTRACT(end_time, start_time); + push_time = INSTR_TIME_GET_DOUBLE(end_time); + pretty_time_interval(push_time, pretty_time_str, 20); + + if (push_isok) + /* report number of files pushed into archive */ + elog(INFO, "pg_probackup archive-push completed successfully, " + "pushed: %u, skipped: %u, time elapsed: %s", + n_total_pushed, n_total_skipped, pretty_time_str); + else + elog(ERROR, "pg_probackup archive-push failed, " + "pushed: %i, skipped: %u, failed: %u, time elapsed: %s", + n_total_pushed, n_total_skipped, n_total_failed, + pretty_time_str); +} + +/* ------------- INTERNAL FUNCTIONS ---------- */ +/* + * Copy files from pg_wal to archive catalog with possible compression. + */ +static void * +push_files(void *arg) +{ + int i; + int rc; + archive_push_arg *args = (archive_push_arg *) arg; + + my_thread_num = args->thread_num; + + for (i = 0; i < parray_num(args->files); i++) + { + bool no_ready_rename = args->no_ready_rename; + WALSegno *xlogfile = (WALSegno *) parray_get(args->files, i); + + if (!pg_atomic_test_set_flag(&xlogfile->lock)) + continue; + + /* Do not rename ready file of the first file, + * we do this to avoid flooding PostgreSQL log with + * warnings about ready file been missing. + */ + if (strcmp(args->first_filename, xlogfile->name) == 0) + no_ready_rename = true; + + rc = push_file(xlogfile, args->archive_status_dir, + args->pg_xlog_dir, args->archive_dir, + args->overwrite, args->no_sync, + args->archive_timeout, no_ready_rename, + /* do not compress .backup, .partial and .history files */ + args->compress && IsXLogFileName(xlogfile->name) ? true : false, + args->compress_level); + + if (rc == 0) + args->n_pushed++; + else + args->n_skipped++; + } + + /* close ssh connection */ + fio_disconnect(); + + args->ret = 0; + return NULL; +} + +int +push_file(WALSegno *xlogfile, const char *archive_status_dir, + const char *pg_xlog_dir, const char *archive_dir, + bool overwrite, bool no_sync, uint32 archive_timeout, + bool no_ready_rename, bool is_compress, + int compress_level) +{ + int rc; + char wal_file_dummy[MAXPGPATH]; + + join_path_components(wal_file_dummy, archive_status_dir, xlogfile->name); + + elog(LOG, "pushing file \"%s\"", xlogfile->name); + + /* If compression is not required, then just copy it as is */ + if (!is_compress) + rc = push_file_internal_uncompressed(xlogfile->name, pg_xlog_dir, + archive_dir, overwrite, no_sync, + archive_timeout); #ifdef HAVE_LIBZ - if (compress_alg == ZLIB_COMPRESS) - is_compress = IsXLogFileName(wal_file_name); + else + rc = push_file_internal_gz(xlogfile->name, pg_xlog_dir, archive_dir, + overwrite, no_sync, compress_level, + archive_timeout); #endif - push_wal_file(absolute_wal_file_path, backup_wal_file_path, is_compress, - overwrite); - elog(INFO, "pg_probackup archive-push completed successfully"); + /* take '--no-ready-rename' flag into account */ + if (!no_ready_rename) + { + char wal_file_ready[MAXPGPATH]; + char wal_file_done[MAXPGPATH]; + + snprintf(wal_file_ready, MAXPGPATH, "%s.%s", wal_file_dummy, "ready"); + snprintf(wal_file_done, MAXPGPATH, "%s.%s", wal_file_dummy, "done"); + + canonicalize_path(wal_file_ready); + canonicalize_path(wal_file_done); + /* It is ok to rename status file in archive_status directory */ + elog(VERBOSE, "Rename \"%s\" to \"%s\"", wal_file_ready, wal_file_done); + + /* do not error out, if rename failed */ + if (fio_rename(wal_file_ready, wal_file_done, FIO_DB_HOST) < 0) + elog(WARNING, "Cannot rename ready file \"%s\" to \"%s\": %s", + wal_file_ready, wal_file_done, strerror(errno)); + } + + return rc; +} + +/* + * Copy non WAL file, such as .backup or .history file, into WAL archive. + * Such files are not compressed. + * Returns: + * 0 - file was successfully pushed + * 1 - push was skipped because file already exists in the archive and + * has the same checksum + */ +int +push_file_internal_uncompressed(const char *wal_file_name, const char *pg_xlog_dir, + const char *archive_dir, bool overwrite, bool no_sync, + uint32 archive_timeout) +{ + FILE *in = NULL; + int out = -1; + char *buf = pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer */ + char from_fullpath[MAXPGPATH]; + char to_fullpath[MAXPGPATH]; + /* partial handling */ + struct stat st; + char to_fullpath_part[MAXPGPATH]; + int partial_try_count = 0; + int partial_file_size = 0; + bool partial_is_stale = true; + + /* from path */ + join_path_components(from_fullpath, pg_xlog_dir, wal_file_name); + canonicalize_path(from_fullpath); + /* to path */ + join_path_components(to_fullpath, archive_dir, wal_file_name); + canonicalize_path(to_fullpath); + + /* Open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open source file \"%s\": %s", from_fullpath, strerror(errno)); + + /* disable stdio buffering for input file */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + + /* open destination partial file for write */ + snprintf(to_fullpath_part, sizeof(to_fullpath_part), "%s.part", to_fullpath); + + /* Grab lock by creating temp file in exclusive mode */ + out = fio_open(to_fullpath_part, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FIO_BACKUP_HOST); + if (out < 0) + { + if (errno != EEXIST) + elog(ERROR, "Failed to open temp WAL file \"%s\": %s", + to_fullpath_part, strerror(errno)); + /* Already existing destination temp file is not an error condition */ + } + else + goto part_opened; + + /* + * Partial file already exists, it could have happened due to: + * 1. failed archive-push + * 2. concurrent archiving + * + * For ARCHIVE_TIMEOUT period we will try to create partial file + * and look for the size of already existing partial file, to + * determine if it is changing or not. + * If after ARCHIVE_TIMEOUT we still failed to create partial + * file, we will make a decision about discarding + * already existing partial file. + */ + + while (partial_try_count < archive_timeout) + { + if (fio_stat(to_fullpath_part, &st, false, FIO_BACKUP_HOST) < 0) + { + if (errno == ENOENT) + { + //part file is gone, lets try to grab it + out = fio_open(to_fullpath_part, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FIO_BACKUP_HOST); + if (out < 0) + { + if (errno != EEXIST) + elog(ERROR, "Failed to open temp WAL file \"%s\": %s", + to_fullpath_part, strerror(errno)); + } + else + /* Successfully created partial file */ + break; + } + else + elog(ERROR, "Cannot stat temp WAL file \"%s\": %s", to_fullpath_part, strerror(errno)); + } + + /* first round */ + if (!partial_try_count) + { + elog(LOG, "Temp WAL file already exists, waiting on it %u seconds: \"%s\"", + archive_timeout, to_fullpath_part); + partial_file_size = st.st_size; + } + + /* file size is changing */ + if (st.st_size > partial_file_size) + partial_is_stale = false; + + sleep(1); + partial_try_count++; + } + /* The possible exit conditions: + * 1. File is grabbed + * 2. File is not grabbed, and it is not stale + * 2. File is not grabbed, and it is stale. + */ + + /* + * If temp file was not grabbed for ARCHIVE_TIMEOUT and temp file is not stale, + * then exit with error. + */ + if (out < 0) + { + if (!partial_is_stale) + elog(ERROR, "Failed to open temp WAL file \"%s\" in %i seconds", + to_fullpath_part, archive_timeout); + + /* Partial segment is considered stale, so reuse it */ + elog(LOG, "Reusing stale temp WAL file \"%s\"", to_fullpath_part); + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + + out = fio_open(to_fullpath_part, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FIO_BACKUP_HOST); + if (out < 0) + elog(ERROR, "Cannot open temp WAL file \"%s\": %s", to_fullpath_part, strerror(errno)); + } + +part_opened: + elog(VERBOSE, "Temp WAL file successfully created: \"%s\"", to_fullpath_part); + /* Check if possible to skip copying */ + if (fileExists(to_fullpath, FIO_BACKUP_HOST)) + { + pg_crc32 crc32_src; + pg_crc32 crc32_dst; + + crc32_src = fio_get_crc32(from_fullpath, FIO_DB_HOST, false); + crc32_dst = fio_get_crc32(to_fullpath, FIO_BACKUP_HOST, false); + + if (crc32_src == crc32_dst) + { + elog(LOG, "WAL file already exists in archive with the same " + "checksum, skip pushing: \"%s\"", from_fullpath); + /* cleanup */ + fclose(in); + fio_close(out); + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + return 1; + } + else + { + if (overwrite) + elog(LOG, "WAL file already exists in archive with " + "different checksum, overwriting: \"%s\"", to_fullpath); + else + { + /* Overwriting is forbidden, + * so we must unlink partial file and exit with error. + */ + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + elog(ERROR, "WAL file already exists in archive with " + "different checksum: \"%s\"", to_fullpath); + } + } + } + + /* copy content */ + for (;;) + { + size_t read_len = 0; + + read_len = fread(buf, 1, OUT_BUF_SIZE, in); + + if (ferror(in)) + { + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot read source file \"%s\": %s", + from_fullpath, strerror(errno)); + } + + if (read_len > 0 && fio_write(out, buf, read_len) != read_len) + { + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot write to destination temp file \"%s\": %s", + to_fullpath_part, strerror(errno)); + } + + if (feof(in)) + break; + } + + /* close source file */ + fclose(in); + + /* close temp file */ + if (fio_close(out) != 0) + { + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot close temp WAL file \"%s\": %s", + to_fullpath_part, strerror(errno)); + } + + /* sync temp file to disk */ + if (!no_sync) + { + if (fio_sync(to_fullpath_part, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Failed to sync file \"%s\": %s", + to_fullpath_part, strerror(errno)); + } + + elog(VERBOSE, "Rename \"%s\" to \"%s\"", to_fullpath_part, to_fullpath); + + //copy_file_attributes(from_path, FIO_DB_HOST, to_path_temp, FIO_BACKUP_HOST, true); + + /* Rename temp file to destination file */ + if (fio_rename(to_fullpath_part, to_fullpath, FIO_BACKUP_HOST) < 0) + { + fio_unlink(to_fullpath_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s", + to_fullpath_part, to_fullpath, strerror(errno)); + } + + pg_free(buf); + return 0; +} + +#ifdef HAVE_LIBZ +/* + * Push WAL segment into archive and apply streaming compression to it. + * Returns: + * 0 - file was successfully pushed + * 1 - push was skipped because file already exists in the archive and + * has the same checksum + */ +int +push_file_internal_gz(const char *wal_file_name, const char *pg_xlog_dir, + const char *archive_dir, bool overwrite, bool no_sync, + int compress_level, uint32 archive_timeout) +{ + FILE *in = NULL; + gzFile out = NULL; + char *buf = pgut_malloc(OUT_BUF_SIZE); + char from_fullpath[MAXPGPATH]; + char to_fullpath[MAXPGPATH]; + char to_fullpath_gz[MAXPGPATH]; + + /* partial handling */ + struct stat st; + + char to_fullpath_gz_part[MAXPGPATH]; + int partial_try_count = 0; + int partial_file_size = 0; + bool partial_is_stale = true; + + /* from path */ + join_path_components(from_fullpath, pg_xlog_dir, wal_file_name); + canonicalize_path(from_fullpath); + /* to path */ + join_path_components(to_fullpath, archive_dir, wal_file_name); + canonicalize_path(to_fullpath); + + /* destination file with .gz suffix */ + snprintf(to_fullpath_gz, sizeof(to_fullpath_gz), "%s.gz", to_fullpath); + /* destination temp file */ + snprintf(to_fullpath_gz_part, sizeof(to_fullpath_gz_part), "%s.part", to_fullpath_gz); + + /* Open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open source WAL file \"%s\": %s", + from_fullpath, strerror(errno)); + + /* disable stdio buffering for input file */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + + /* Grab lock by creating temp file in exclusive mode */ + out = fio_gzopen(to_fullpath_gz_part, PG_BINARY_W, compress_level, FIO_BACKUP_HOST); + if (out == NULL) + { + if (errno != EEXIST) + elog(ERROR, "Cannot open temp WAL file \"%s\": %s", + to_fullpath_gz_part, strerror(errno)); + /* Already existing destination temp file is not an error condition */ + } + else + goto part_opened; + + /* + * Partial file already exists, it could have happened due to: + * 1. failed archive-push + * 2. concurrent archiving + * + * For ARCHIVE_TIMEOUT period we will try to create partial file + * and look for the size of already existing partial file, to + * determine if it is changing or not. + * If after ARCHIVE_TIMEOUT we still failed to create partial + * file, we will make a decision about discarding + * already existing partial file. + */ + + while (partial_try_count < archive_timeout) + { + if (fio_stat(to_fullpath_gz_part, &st, false, FIO_BACKUP_HOST) < 0) + { + if (errno == ENOENT) + { + //part file is gone, lets try to grab it + out = fio_gzopen(to_fullpath_gz_part, PG_BINARY_W, compress_level, FIO_BACKUP_HOST); + if (out == NULL) + { + if (errno != EEXIST) + elog(ERROR, "Failed to open temp WAL file \"%s\": %s", + to_fullpath_gz_part, strerror(errno)); + } + else + /* Successfully created partial file */ + break; + } + else + elog(ERROR, "Cannot stat temp WAL file \"%s\": %s", + to_fullpath_gz_part, strerror(errno)); + } + + /* first round */ + if (!partial_try_count) + { + elog(LOG, "Temp WAL file already exists, waiting on it %u seconds: \"%s\"", + archive_timeout, to_fullpath_gz_part); + partial_file_size = st.st_size; + } + + /* file size is changing */ + if (st.st_size > partial_file_size) + partial_is_stale = false; + + sleep(1); + partial_try_count++; + } + /* The possible exit conditions: + * 1. File is grabbed + * 2. File is not grabbed, and it is not stale + * 2. File is not grabbed, and it is stale. + */ + + /* + * If temp file was not grabbed for ARCHIVE_TIMEOUT and temp file is not stale, + * then exit with error. + */ + if (out == NULL) + { + if (!partial_is_stale) + elog(ERROR, "Failed to open temp WAL file \"%s\" in %i seconds", + to_fullpath_gz_part, archive_timeout); + + /* Partial segment is considered stale, so reuse it */ + elog(LOG, "Reusing stale temp WAL file \"%s\"", to_fullpath_gz_part); + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + + out = fio_gzopen(to_fullpath_gz_part, PG_BINARY_W, compress_level, FIO_BACKUP_HOST); + if (out == NULL) + elog(ERROR, "Cannot open temp WAL file \"%s\": %s", + to_fullpath_gz_part, strerror(errno)); + } + +part_opened: + elog(VERBOSE, "Temp WAL file successfully created: \"%s\"", to_fullpath_gz_part); + /* Check if possible to skip copying, + */ + if (fileExists(to_fullpath_gz, FIO_BACKUP_HOST)) + { + pg_crc32 crc32_src; + pg_crc32 crc32_dst; + + /* TODO: what if one of them goes missing? */ + crc32_src = fio_get_crc32(from_fullpath, FIO_DB_HOST, false); + crc32_dst = fio_get_crc32(to_fullpath_gz, FIO_BACKUP_HOST, true); + + if (crc32_src == crc32_dst) + { + elog(LOG, "WAL file already exists in archive with the same " + "checksum, skip pushing: \"%s\"", from_fullpath); + /* cleanup */ + fclose(in); + fio_gzclose(out); + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + return 1; + } + else + { + if (overwrite) + elog(LOG, "WAL file already exists in archive with " + "different checksum, overwriting: \"%s\"", to_fullpath_gz); + else + { + /* Overwriting is forbidden, + * so we must unlink partial file and exit with error. + */ + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + elog(ERROR, "WAL file already exists in archive with " + "different checksum: \"%s\"", to_fullpath_gz); + } + } + } + + /* copy content */ + for (;;) + { + size_t read_len = 0; + + read_len = fread(buf, 1, OUT_BUF_SIZE, in); + + if (ferror(in)) + { + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot read from source file \"%s\": %s", + from_fullpath, strerror(errno)); + } + + if (read_len > 0 && fio_gzwrite(out, buf, read_len) != read_len) + { + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot write to compressed temp WAL file \"%s\": %s", + to_fullpath_gz_part, get_gz_error(out, errno)); + } + + if (feof(in)) + break; + } + + /* close source file */ + fclose(in); + + /* close temp file */ + if (fio_gzclose(out) != 0) + { + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot close compressed temp WAL file \"%s\": %s", + to_fullpath_gz_part, strerror(errno)); + } + + /* sync temp file to disk */ + if (!no_sync) + { + if (fio_sync(to_fullpath_gz_part, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Failed to sync file \"%s\": %s", + to_fullpath_gz_part, strerror(errno)); + } + + elog(VERBOSE, "Rename \"%s\" to \"%s\"", + to_fullpath_gz_part, to_fullpath_gz); + + //copy_file_attributes(from_path, FIO_DB_HOST, to_path_temp, FIO_BACKUP_HOST, true); + + /* Rename temp file to destination file */ + if (fio_rename(to_fullpath_gz_part, to_fullpath_gz, FIO_BACKUP_HOST) < 0) + { + fio_unlink(to_fullpath_gz_part, FIO_BACKUP_HOST); + elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s", + to_fullpath_gz_part, to_fullpath_gz, strerror(errno)); + } + + pg_free(buf); return 0; } +#endif + +#ifdef HAVE_LIBZ +/* + * Show error during work with compressed file + */ +static const char * +get_gz_error(gzFile gzf, int errnum) +{ + int gz_errnum; + const char *errmsg; + + errmsg = fio_gzerror(gzf, &gz_errnum); + if (gz_errnum == Z_ERRNO) + return strerror(errnum); + else + return errmsg; +} +#endif + +/* Copy file attributes */ +//static void +//copy_file_attributes(const char *from_path, fio_location from_location, +// const char *to_path, fio_location to_location, +// bool unlink_on_error) +//{ +// struct stat st; +// +// if (fio_stat(from_path, &st, true, from_location) == -1) +// { +// if (unlink_on_error) +// fio_unlink(to_path, to_location); +// elog(ERROR, "Cannot stat file \"%s\": %s", +// from_path, strerror(errno)); +// } +// +// if (fio_chmod(to_path, st.st_mode, to_location) == -1) +// { +// if (unlink_on_error) +// fio_unlink(to_path, to_location); +// elog(ERROR, "Cannot change mode of file \"%s\": %s", +// to_path, strerror(errno)); +// } +//} + +/* Look for files with '.ready' suffix in archive_status directory + * and pack such files into batch sized array. + */ +parray * +setup_push_filelist(const char *archive_status_dir, const char *first_file, + int batch_size) +{ + int i; + WALSegno *xlogfile = NULL; + parray *status_files = NULL; + parray *batch_files = parray_new(); + + /* guarantee that first filename is in batch list */ + xlogfile = palloc(sizeof(WALSegno)); + pg_atomic_init_flag(&xlogfile->lock); + snprintf(xlogfile->name, MAXFNAMELEN, "%s", first_file); + parray_append(batch_files, xlogfile); + + if (batch_size < 2) + return batch_files; + + /* get list of files from archive_status */ + status_files = parray_new(); + dir_list_file(status_files, archive_status_dir, false, false, false, false, true, 0, FIO_DB_HOST); + parray_qsort(status_files, pgFileCompareName); + + for (i = 0; i < parray_num(status_files); i++) + { + int result = 0; + char filename[MAXFNAMELEN]; + char suffix[MAXFNAMELEN]; + pgFile *file = (pgFile *) parray_get(status_files, i); + + result = sscanf(file->name, "%[^.]%s", (char *) &filename, (char *) &suffix); + + if (result != 2) + continue; + + if (strcmp(suffix, ".ready") != 0) + continue; + + /* first filename already in batch list */ + if (strcmp(filename, first_file) == 0) + continue; + + xlogfile = palloc(sizeof(WALSegno)); + pg_atomic_init_flag(&xlogfile->lock); + + snprintf(xlogfile->name, MAXFNAMELEN, "%s", filename); + parray_append(batch_files, xlogfile); + + if (parray_num(batch_files) >= batch_size) + break; + } + + /* cleanup */ + parray_walk(status_files, pgFileFree); + parray_free(status_files); + + return batch_files; +} /* * pg_probackup specific restore command. * Move files from arclog_path to pgdata/wal_file_path. + * + * The problem with archive-get: we must be very careful about + * erroring out, because postgres will interpretent our negative exit code + * as the fact, that requested file is missing and may take irreversible actions. + * So if file copying has failed we must retry several times before bailing out. + * + * TODO: add support of -D option. + * TOTHINK: what can be done about ssh connection been broken? + * TOTHINk: do we need our own rmtree function ? + * TOTHINk: so sort of async prefetch ? + */ -int -do_archive_get(char *wal_file_path, char *wal_file_name) +void +do_archive_get(InstanceConfig *instance, const char *prefetch_dir_arg, + char *wal_file_path, char *wal_file_name, int batch_size, + bool validate_wal) { - char backup_wal_file_path[MAXPGPATH]; - char absolute_wal_file_path[MAXPGPATH]; - char current_dir[MAXPGPATH]; + int fail_count = 0; + char backup_wal_file_path[MAXPGPATH]; + char absolute_wal_file_path[MAXPGPATH]; + char current_dir[MAXPGPATH]; + char prefetch_dir[MAXPGPATH]; + char pg_xlog_dir[MAXPGPATH]; + char prefetched_file[MAXPGPATH]; + + /* reporting */ + uint32 n_fetched = 0; + int n_actual_threads = num_threads; + uint32 n_files_in_prefetch = 0; - if (wal_file_name == NULL && wal_file_path == NULL) - elog(ERROR, "required parameters are not specified: --wal-file-name %%f --wal-file-path %%p"); + /* time reporting */ + instr_time start_time, end_time; + double get_time; + char pretty_time_str[20]; if (wal_file_name == NULL) - elog(ERROR, "required parameter not specified: --wal-file-name %%f"); + elog(ERROR, "Required parameter not specified: --wal-file-name %%f"); if (wal_file_path == NULL) - elog(ERROR, "required parameter not specified: --wal-file-path %%p"); + elog(ERROR, "Required parameter not specified: --wal_file_path %%p"); if (!getcwd(current_dir, sizeof(current_dir))) elog(ERROR, "getcwd() error"); + /* path to PGDATA/pg_wal directory */ + join_path_components(pg_xlog_dir, current_dir, XLOGDIR); + + /* destination full filepath, usually it is PGDATA/pg_wal/RECOVERYXLOG */ join_path_components(absolute_wal_file_path, current_dir, wal_file_path); - join_path_components(backup_wal_file_path, arclog_path, wal_file_name); - elog(INFO, "pg_probackup archive-get from %s to %s", - backup_wal_file_path, absolute_wal_file_path); - get_wal_file(backup_wal_file_path, absolute_wal_file_path); - elog(INFO, "pg_probackup archive-get completed successfully"); + /* full filepath to WAL file in archive directory. + * backup_path/wal/instance_name/000000010000000000000001 */ + join_path_components(backup_wal_file_path, instance->arclog_path, wal_file_name); - return 0; + INSTR_TIME_SET_CURRENT(start_time); + if (num_threads > batch_size) + n_actual_threads = batch_size; + elog(INFO, "pg_probackup archive-get WAL file: %s, remote: %s, threads: %i/%i, batch: %i", + wal_file_name, IsSshProtocol() ? "ssh" : "none", n_actual_threads, num_threads, batch_size); + + num_threads = n_actual_threads; + + elog(VERBOSE, "Obtaining XLOG_SEG_SIZE from pg_control file"); + instance->xlog_seg_size = get_xlog_seg_size(current_dir); + + /* Prefetch optimization kicks in only if simple XLOG segments is requested + * and batching is enabled. + * + * We check that file do exists in prefetch directory, then we validate it and + * rename to destination path. + * If file do not exists, then we run prefetch and rename it. + */ + if (IsXLogFileName(wal_file_name) && batch_size > 1) + { + XLogSegNo segno; + TimeLineID tli; + + GetXLogFromFileName(wal_file_name, &tli, &segno, instance->xlog_seg_size); + + if (prefetch_dir_arg) + /* use provided prefetch directory */ + snprintf(prefetch_dir, sizeof(prefetch_dir), "%s", prefetch_dir_arg); + else + /* use default path */ + join_path_components(prefetch_dir, pg_xlog_dir, "pbk_prefetch"); + + /* Construct path to WAL file in prefetch directory. + * current_dir/pg_wal/pbk_prefech/000000010000000000000001 + */ + join_path_components(prefetched_file, prefetch_dir, wal_file_name); + + /* check if file is available in prefetch directory */ + if (access(prefetched_file, F_OK) == 0) + { + /* Prefetched WAL segment is available, before using it, we must validate it. + * But for validation to work properly(because of contrecord), we must be sure + * that next WAL segment is also available in prefetch directory. + * If next segment do not exists in prefetch directory, we must provide it from + * archive. If it is NOT available in the archive, then file in prefetch directory + * cannot be trusted. In this case we discard all prefetched files and + * copy requested file directly from archive. + */ + if (!next_wal_segment_exists(tli, segno, prefetch_dir, instance->xlog_seg_size)) + n_fetched = run_wal_prefetch(prefetch_dir, instance->arclog_path, + tli, segno, num_threads, false, batch_size, + instance->xlog_seg_size); + + n_files_in_prefetch = maintain_prefetch(prefetch_dir, segno, instance->xlog_seg_size); + + if (wal_satisfy_from_prefetch(tli, segno, wal_file_name, prefetch_dir, + absolute_wal_file_path, instance->xlog_seg_size, + validate_wal)) + { + n_files_in_prefetch--; + elog(INFO, "pg_probackup archive-get used prefetched WAL segment %s, prefetch state: %u/%u", + wal_file_name, n_files_in_prefetch, batch_size); + goto get_done; + } + else + { + /* discard prefetch */ +// n_fetched = 0; + pgut_rmtree(prefetch_dir, false, false); + } + } + else + { + /* Do prefetch maintenance here */ + + mkdir(prefetch_dir, DIR_PERMISSION); /* In case prefetch directory do not exists yet */ + + /* We`ve failed to satisfy current request from prefetch directory, + * therefore we can discard its content, since it may be corrupted or + * contain stale files. + * + * UPDATE: we should not discard prefetch easily, because failing to satisfy + * request for WAL may come from this recovery behavior: + * https://www.postgresql.org/message-id/flat/16159-f5a34a3a04dc67e0%40postgresql.org + */ +// rmtree(prefetch_dir, false); + + /* prefetch files */ + n_fetched = run_wal_prefetch(prefetch_dir, instance->arclog_path, + tli, segno, num_threads, true, batch_size, + instance->xlog_seg_size); + + n_files_in_prefetch = maintain_prefetch(prefetch_dir, segno, instance->xlog_seg_size); + + if (wal_satisfy_from_prefetch(tli, segno, wal_file_name, prefetch_dir, absolute_wal_file_path, + instance->xlog_seg_size, validate_wal)) + { + n_files_in_prefetch--; + elog(INFO, "pg_probackup archive-get copied WAL file %s, prefetch state: %u/%u", + wal_file_name, n_files_in_prefetch, batch_size); + goto get_done; + } +// else +// { +// /* yet again failed to satisfy request from prefetch */ +// n_fetched = 0; +// rmtree(prefetch_dir, false); +// } + } + } + + /* we use it to extend partial file later */ + xlog_seg_size = instance->xlog_seg_size; + + /* Either prefetch didn`t cut it, or batch mode is disabled or + * the requested file is not WAL segment. + * Copy file from the archive directly. + * Retry several times before bailing out. + * + * TODO: + * files copied from archive directly are not validated, which is not ok. + * TOTHINK: + * Current WAL validation cannot be applied to partial files. + */ + + while (fail_count < 3) + { + if (get_wal_file(wal_file_name, backup_wal_file_path, absolute_wal_file_path, false)) + { + fail_count = 0; + elog(INFO, "pg_probackup archive-get copied WAL file %s", wal_file_name); + n_fetched++; + break; + } + else + fail_count++; + + elog(LOG, "Failed to get WAL file %s, retry %i/3", wal_file_name, fail_count); + } + + /* TODO/TOTHINK: + * If requested file is corrupted, we have no way to warn PostgreSQL about it. + * We either can: + * 1. feed to recovery and let PostgreSQL sort it out. Currently we do this. + * 2. error out. + * + * Also note, that we can detect corruption only if prefetch mode is used. + * TODO: if corruption or network problem encountered, kill yourself + * with SIGTERN to prevent recovery from starting up database. + */ + +get_done: + INSTR_TIME_SET_CURRENT(end_time); + INSTR_TIME_SUBTRACT(end_time, start_time); + get_time = INSTR_TIME_GET_DOUBLE(end_time); + pretty_time_interval(get_time, pretty_time_str, 20); + + if (fail_count == 0) + elog(INFO, "pg_probackup archive-get completed successfully, fetched: %i/%i, time elapsed: %s", + n_fetched, batch_size, pretty_time_str); + else + elog(ERROR, "pg_probackup archive-get failed to deliver WAL file: %s, time elapsed: %s", + wal_file_name, pretty_time_str); +} + +/* + * Copy batch_size of regular WAL segments into prefetch directory, + * starting with first_file. + * + * inclusive - should we copy first_file or not. + */ +uint32 run_wal_prefetch(const char *prefetch_dir, const char *archive_dir, + TimeLineID tli, XLogSegNo first_segno, int num_threads, + bool inclusive, int batch_size, uint32 wal_seg_size) +{ + int i; + XLogSegNo segno; + parray *batch_files = parray_new(); + int n_total_fetched = 0; + + if (!inclusive) + first_segno++; + + for (segno = first_segno; segno < (first_segno + batch_size); segno++) + { + WALSegno *xlogfile = palloc(sizeof(WALSegno)); + pg_atomic_init_flag(&xlogfile->lock); + + /* construct filename for WAL segment */ + GetXLogFileName(xlogfile->name, tli, segno, wal_seg_size); + + parray_append(batch_files, xlogfile); + + } + + /* copy segments */ + if (num_threads == 1) + { + for (i = 0; i < parray_num(batch_files); i++) + { + char to_fullpath[MAXPGPATH]; + char from_fullpath[MAXPGPATH]; + WALSegno *xlogfile = (WALSegno *) parray_get(batch_files, i); + + join_path_components(to_fullpath, prefetch_dir, xlogfile->name); + join_path_components(from_fullpath, archive_dir, xlogfile->name); + + /* It is ok, maybe requested batch is greater than the number of available + * files in the archive + */ + if (!get_wal_file(xlogfile->name, from_fullpath, to_fullpath, true)) + { + elog(LOG, "Thread [%d]: Failed to prefetch WAL segment %s", 0, xlogfile->name); + break; + } + + n_total_fetched++; + } + } + else + { + /* arrays with meta info for multi threaded archive-get */ + pthread_t *threads; + archive_get_arg *threads_args; + + /* init thread args */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (archive_get_arg *) palloc(sizeof(archive_get_arg) * num_threads); + + for (i = 0; i < num_threads; i++) + { + archive_get_arg *arg = &(threads_args[i]); + + arg->prefetch_dir = prefetch_dir; + arg->archive_dir = archive_dir; + + arg->thread_num = i+1; + arg->files = batch_files; + } + + /* Run threads */ + for (i = 0; i < num_threads; i++) + { + archive_get_arg *arg = &(threads_args[i]); + pthread_create(&threads[i], NULL, get_files, arg); + } + + /* Wait threads */ + for (i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + n_total_fetched += threads_args[i].n_fetched; + } + } + /* TODO: free batch_files */ + return n_total_fetched; +} + +/* + * Copy files from archive catalog to pg_wal. + */ +static void * +get_files(void *arg) +{ + int i; + char to_fullpath[MAXPGPATH]; + char from_fullpath[MAXPGPATH]; + archive_get_arg *args = (archive_get_arg *) arg; + + my_thread_num = args->thread_num; + + for (i = 0; i < parray_num(args->files); i++) + { + WALSegno *xlogfile = (WALSegno *) parray_get(args->files, i); + + if (prefetch_stop) + break; + + if (!pg_atomic_test_set_flag(&xlogfile->lock)) + continue; + + join_path_components(from_fullpath, args->archive_dir, xlogfile->name); + join_path_components(to_fullpath, args->prefetch_dir, xlogfile->name); + + if (!get_wal_file(xlogfile->name, from_fullpath, to_fullpath, true)) + { + /* It is ok, maybe requested batch is greater than the number of available + * files in the archive + */ + elog(LOG, "Failed to prefetch WAL segment %s", xlogfile->name); + prefetch_stop = true; + break; + } + + args->n_fetched++; + } + + /* close ssh connection */ + fio_disconnect(); + + return NULL; +} + +/* + * Copy WAL segment from archive catalog to pgdata with possible decompression. + * When running in prefetch mode, we should not error out. + */ +bool +get_wal_file(const char *filename, const char *from_fullpath, + const char *to_fullpath, bool prefetch_mode) +{ + int rc = FILE_MISSING; + FILE *out; + char from_fullpath_gz[MAXPGPATH]; + bool src_partial = false; + + snprintf(from_fullpath_gz, sizeof(from_fullpath_gz), "%s.gz", from_fullpath); + + /* open destination file */ + out = fopen(to_fullpath, PG_BINARY_W); + if (!out) + { + elog(WARNING, "Failed to open file '%s': %s", + to_fullpath, strerror(errno)); + return false; + } + + if (chmod(to_fullpath, FILE_PERMISSION) == -1) + { + elog(WARNING, "Cannot change mode of file '%s': %s", + to_fullpath, strerror(errno)); + fclose(out); + unlink(to_fullpath); + return false; + } + + /* disable buffering for output file */ + setvbuf(out, NULL, _IONBF, BUFSIZ); + + /* In prefetch mode, we do look only for full WAL segments + * In non-prefetch mode, do look up '.partial' and '.gz.partial' + * segments. + */ + if (fio_is_remote(FIO_BACKUP_HOST)) + { + char *errmsg = NULL; + /* get file via ssh */ +#ifdef HAVE_LIBZ + /* If requested file is regular WAL segment, then try to open it with '.gz' suffix... */ + if (IsXLogFileName(filename)) + rc = fio_send_file_gz(from_fullpath_gz, to_fullpath, out, &errmsg); + if (rc == FILE_MISSING) +#endif + /* ... failing that, use uncompressed */ + rc = fio_send_file(from_fullpath, to_fullpath, out, NULL, &errmsg); + + /* When not in prefetch mode, try to use partial file */ + if (rc == FILE_MISSING && !prefetch_mode && IsXLogFileName(filename)) + { + char from_partial[MAXPGPATH]; + +#ifdef HAVE_LIBZ + /* '.gz.partial' goes first ... */ + snprintf(from_partial, sizeof(from_partial), "%s.gz.partial", from_fullpath); + rc = fio_send_file_gz(from_partial, to_fullpath, out, &errmsg); + if (rc == FILE_MISSING) +#endif + { + /* ... failing that, use '.partial' */ + snprintf(from_partial, sizeof(from_partial), "%s.partial", from_fullpath); + rc = fio_send_file(from_partial, to_fullpath, out, NULL, &errmsg); + } + + if (rc == SEND_OK) + src_partial = true; + } + + if (rc == WRITE_FAILED) + elog(WARNING, "Cannot write to file '%s': %s", + to_fullpath, strerror(errno)); + + if (errmsg) + elog(WARNING, "%s", errmsg); + + pg_free(errmsg); + } + else + { + /* get file locally */ +#ifdef HAVE_LIBZ + /* If requested file is regular WAL segment, then try to open it with '.gz' suffix... */ + if (IsXLogFileName(filename)) + rc = get_wal_file_internal(from_fullpath_gz, to_fullpath, out, true); + if (rc == FILE_MISSING) +#endif + /* ... failing that, use uncompressed */ + rc = get_wal_file_internal(from_fullpath, to_fullpath, out, false); + + /* When not in prefetch mode, try to use partial file */ + if (rc == FILE_MISSING && !prefetch_mode && IsXLogFileName(filename)) + { + char from_partial[MAXPGPATH]; + +#ifdef HAVE_LIBZ + /* '.gz.partial' goes first ... */ + snprintf(from_partial, sizeof(from_partial), "%s.gz.partial", from_fullpath); + rc = get_wal_file_internal(from_partial, to_fullpath, out, true); + if (rc == FILE_MISSING) +#endif + { + /* ... failing that, use '.partial' */ + snprintf(from_partial, sizeof(from_partial), "%s.partial", from_fullpath); + rc = get_wal_file_internal(from_partial, to_fullpath, out, false); + } + + if (rc == SEND_OK) + src_partial = true; + } + } + + if (!prefetch_mode && (rc == FILE_MISSING)) + elog(LOG, "Target WAL file is missing: %s", filename); + + if (rc < 0) + { + fclose(out); + unlink(to_fullpath); + return false; + } + + /* If partial file was used as source, then it is very likely that destination + * file is not equal to XLOG_SEG_SIZE - that is the way pg_receivexlog works. + * We must manually extent it up to XLOG_SEG_SIZE. + */ + if (src_partial) + { + + if (fflush(out) != 0) + { + elog(WARNING, "Cannot flush file \"%s\": %s", to_fullpath, strerror(errno)); + fclose(out); + unlink(to_fullpath); + return false; + } + + if (ftruncate(fileno(out), xlog_seg_size) != 0) + { + elog(WARNING, "Cannot extend file \"%s\": %s", to_fullpath, strerror(errno)); + fclose(out); + unlink(to_fullpath); + return false; + } + } + + if (fclose(out) != 0) + { + elog(WARNING, "Cannot close file '%s': %s", to_fullpath, strerror(errno)); + unlink(to_fullpath); + return false; + } + + elog(LOG, "WAL file successfully %s: %s", + prefetch_mode ? "prefetched" : "copied", filename); + return true; +} + +/* + * Copy WAL segment with possible decompression from local archive. + * Return codes: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * WRITE_FAILED (-4) + * ZLIB_ERROR (-5) + */ +int +get_wal_file_internal(const char *from_path, const char *to_path, FILE *out, + bool is_decompress) +{ +#ifdef HAVE_LIBZ + gzFile gz_in = NULL; +#endif + FILE *in = NULL; + char *buf = pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer */ + int exit_code = 0; + + elog(VERBOSE, "Attempting to %s WAL file '%s'", + is_decompress ? "open compressed" : "open", from_path); + + /* open source file for read */ + if (!is_decompress) + { + in = fopen(from_path, PG_BINARY_R); + if (in == NULL) + { + if (errno == ENOENT) + exit_code = FILE_MISSING; + else + { + elog(WARNING, "Cannot open source WAL file \"%s\": %s", + from_path, strerror(errno)); + exit_code = OPEN_FAILED; + } + goto cleanup; + } + + /* disable stdio buffering */ + setvbuf(out, NULL, _IONBF, BUFSIZ); + } +#ifdef HAVE_LIBZ + else + { + gz_in = gzopen(from_path, PG_BINARY_R); + if (gz_in == NULL) + { + if (errno == ENOENT) + exit_code = FILE_MISSING; + else + { + elog(WARNING, "Cannot open compressed WAL file \"%s\": %s", + from_path, strerror(errno)); + exit_code = OPEN_FAILED; + } + + goto cleanup; + } + } +#endif + + /* copy content */ + for (;;) + { + int read_len = 0; + +#ifdef HAVE_LIBZ + if (is_decompress) + { + read_len = gzread(gz_in, buf, OUT_BUF_SIZE); + + if (read_len <= 0) + { + if (gzeof(gz_in)) + break; + else + { + elog(WARNING, "Cannot read compressed WAL file \"%s\": %s", + from_path, get_gz_error(gz_in, errno)); + exit_code = READ_FAILED; + break; + } + } + } + else +#endif + { + read_len = fread(buf, 1, OUT_BUF_SIZE, in); + + if (ferror(in)) + { + elog(WARNING, "Cannot read source WAL file \"%s\": %s", + from_path, strerror(errno)); + exit_code = READ_FAILED; + break; + } + + if (read_len == 0 && feof(in)) + break; + } + + if (read_len > 0) + { + if (fwrite(buf, 1, read_len, out) != read_len) + { + elog(WARNING, "Cannot write to WAL file '%s': %s", + to_path, strerror(errno)); + exit_code = WRITE_FAILED; + break; + } + } + } + +cleanup: +#ifdef HAVE_LIBZ + if (gz_in) + gzclose(gz_in); +#endif + if (in) + fclose(in); + + pg_free(buf); + return exit_code; +} + +bool next_wal_segment_exists(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size) +{ + char next_wal_filename[MAXFNAMELEN]; + char next_wal_fullpath[MAXPGPATH]; + + GetXLogFileName(next_wal_filename, tli, segno + 1, wal_seg_size); + + join_path_components(next_wal_fullpath, prefetch_dir, next_wal_filename); + + if (access(next_wal_fullpath, F_OK) == 0) + return true; + + return false; +} + +/* Try to use content of prefetch directory to satisfy request for WAL segment + * If file is found, then validate it and rename. + * If requested file do not exists or validation has failed, then + * caller must copy WAL file directly from archive. + */ +bool wal_satisfy_from_prefetch(TimeLineID tli, XLogSegNo segno, const char *wal_file_name, + const char *prefetch_dir, const char *absolute_wal_file_path, + uint32 wal_seg_size, bool parse_wal) +{ + char prefetched_file[MAXPGPATH]; + + join_path_components(prefetched_file, prefetch_dir, wal_file_name); + + /* If prefetched file do not exists, then nothing can be done */ + if (access(prefetched_file, F_OK) != 0) + return false; + + /* If the next WAL segment do not exists in prefetch directory, + * then current segment cannot be validated, therefore cannot be used + * to satisfy recovery request. + */ + if (parse_wal && !next_wal_segment_exists(tli, segno, prefetch_dir, wal_seg_size)) + return false; + + if (parse_wal && !validate_wal_segment(tli, segno, prefetch_dir, wal_seg_size)) + { + /* prefetched WAL segment is not looking good */ + elog(LOG, "Prefetched WAL segment %s is invalid, cannot use it", wal_file_name); + unlink(prefetched_file); + return false; + } + + /* file is available in prefetch directory */ + if (rename(prefetched_file, absolute_wal_file_path) == 0) + return true; + else + { + elog(WARNING, "Cannot rename file '%s' to '%s': %s", + prefetched_file, absolute_wal_file_path, strerror(errno)); + unlink(prefetched_file); + } + + return false; +} + +/* + * Maintain prefetch directory: drop redundant files + * Return number of files in prefetch directory. + */ +uint32 maintain_prefetch(const char *prefetch_dir, XLogSegNo first_segno, uint32 wal_seg_size) +{ + DIR *dir; + struct dirent *dir_ent; + uint32 n_files = 0; + + XLogSegNo segno; + TimeLineID tli; + + char fullpath[MAXPGPATH]; + + dir = opendir(prefetch_dir); + if (dir == NULL) + { + if (errno != ENOENT) + elog(WARNING, "Cannot open directory \"%s\": %s", prefetch_dir, strerror(errno)); + + return n_files; + } + + while ((dir_ent = readdir(dir))) + { + /* Skip entries point current dir or parent dir */ + if (strcmp(dir_ent->d_name, ".") == 0 || + strcmp(dir_ent->d_name, "..") == 0) + continue; + + if (IsXLogFileName(dir_ent->d_name)) + { + + GetXLogFromFileName(dir_ent->d_name, &tli, &segno, wal_seg_size); + + /* potentially useful segment, keep it */ + if (segno >= first_segno) + { + n_files++; + continue; + } + } + + join_path_components(fullpath, prefetch_dir, dir_ent->d_name); + unlink(fullpath); + } + + closedir(dir); + + return n_files; } diff --git a/src/backup.c b/src/backup.c index 9a4c70054..e2293fe4c 100644 --- a/src/backup.c +++ b/src/backup.c @@ -3,31 +3,27 @@ * backup.c: backup DB cluster, archived WAL * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" -#include -#include -#include -#include -#include -#include -#include -#include - +#if PG_VERSION_NUM < 110000 #include "catalog/catalog.h" +#endif #include "catalog/pg_tablespace.h" -#include "datapagemap.h" -#include "libpq/pqsignal.h" #include "pgtar.h" #include "receivelog.h" -#include "storage/bufpage.h" #include "streamutil.h" + +#include +#include +#include + #include "utils/thread.h" +#include "utils/file.h" static int standby_message_timeout = 10 * 1000; /* 10 sec = default */ static XLogRecPtr stop_backup_lsn = InvalidXLogRecPtr; @@ -35,7 +31,7 @@ static XLogRecPtr stop_stream_lsn = InvalidXLogRecPtr; /* * How long we should wait for streaming end in seconds. - * Retreived as checkpoint_timeout + checkpoint_timeout * 0.1 + * Retrieved as checkpoint_timeout + checkpoint_timeout * 0.1 */ static uint32 stream_stop_timeout = 0; /* Time in which we started to wait for streaming end */ @@ -62,25 +58,16 @@ typedef struct * 0 means there is no error, 1 - there is an error. */ int ret; + + XLogRecPtr startpos; + TimeLineID starttli; } StreamThreadArg; static pthread_t stream_thread; static StreamThreadArg stream_thread_arg = {"", NULL, 1}; -static int is_ptrack_enable = false; -bool is_ptrack_support = false; -bool is_checksum_enabled = false; bool exclusive_backup = false; -/* Backup connections */ -static PGconn *backup_conn = NULL; -static PGconn *master_conn = NULL; -static PGconn *backup_conn_replication = NULL; - -/* PostgreSQL server version from "backup_conn" */ -static int server_version = 0; -static char server_version_str[100] = ""; - /* Is pg_start_backup() was executed */ static bool backup_in_progress = false; /* Is pg_stop_backup() was sent */ @@ -90,364 +77,51 @@ static bool pg_stop_backup_is_sent = false; * Backup routines */ static void backup_cleanup(bool fatal, void *userdata); -static void backup_disconnect(bool fatal, void *userdata); static void *backup_files(void *arg); -static void *remote_backup_files(void *arg); -static void do_backup_instance(void); +static void do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool backup_logs); -static void pg_start_backup(const char *label, bool smooth, pgBackup *backup); +static void pg_start_backup(const char *label, bool smooth, pgBackup *backup, + PGNodeInfo *nodeInfo, PGconn *conn); static void pg_switch_wal(PGconn *conn); -static void pg_stop_backup(pgBackup *backup); -static int checkpoint_timeout(void); - -//static void backup_list_file(parray *files, const char *root, ) -static void parse_backup_filelist_filenames(parray *files, const char *root); -static void wait_wal_lsn(XLogRecPtr lsn, bool wait_prev_segment); -static void wait_replica_wal_lsn(XLogRecPtr lsn, bool is_start_backup); -static void make_pagemap_from_ptrack(parray *files); -static void *StreamLog(void *arg); - -static void get_remote_pgdata_filelist(parray *files); -static void ReceiveFileList(parray* files, PGconn *conn, PGresult *res, int rownum); -static void remote_copy_file(PGconn *conn, pgFile* file); - -/* Ptrack functions */ -static void pg_ptrack_clear(void); -static bool pg_ptrack_support(void); -static bool pg_ptrack_enable(void); -static bool pg_checksum_enable(void); -static bool pg_is_in_recovery(void); -static bool pg_ptrack_get_and_clear_db(Oid dbOid, Oid tblspcOid); -static char *pg_ptrack_get_and_clear(Oid tablespace_oid, - Oid db_oid, - Oid rel_oid, - size_t *result_size); -static XLogRecPtr get_last_ptrack_lsn(void); - -/* Check functions */ -static void check_server_version(void); -static void check_system_identifiers(void); -static void confirm_block_size(const char *name, int blcksz); -static void set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i); +static void pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, PGNodeInfo *nodeInfo); +static int checkpoint_timeout(PGconn *backup_conn); -#define disconnect_and_exit(code) \ - { \ - if (conn != NULL) PQfinish(conn); \ - exit(code); \ - } +static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli, + bool in_prev_segment, bool segment_only, + int timeout_elevel, bool in_stream_dir); -/* Fill "files" with data about all the files to backup */ -static void -get_remote_pgdata_filelist(parray *files) -{ - PGresult *res; - int resultStatus; - int i; - - backup_conn_replication = pgut_connect_replication(pgut_dbname); - - if (PQsendQuery(backup_conn_replication, "FILE_BACKUP FILELIST") == 0) - elog(ERROR,"%s: could not send replication command \"%s\": %s", - PROGRAM_NAME, "FILE_BACKUP", PQerrorMessage(backup_conn_replication)); - - res = PQgetResult(backup_conn_replication); - - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - resultStatus = PQresultStatus(res); - PQclear(res); - elog(ERROR, "cannot start getting FILE_BACKUP filelist: %s, result_status %d", - PQerrorMessage(backup_conn_replication), resultStatus); - } - - if (PQntuples(res) < 1) - elog(ERROR, "%s: no data returned from server", PROGRAM_NAME); - - for (i = 0; i < PQntuples(res); i++) - { - ReceiveFileList(files, backup_conn_replication, res, i); - } - - res = PQgetResult(backup_conn_replication); - if (PQresultStatus(res) != PGRES_COMMAND_OK) - { - elog(ERROR, "%s: final receive failed: %s", - PROGRAM_NAME, PQerrorMessage(backup_conn_replication)); - } - - PQfinish(backup_conn_replication); -} - -/* - * workhorse for get_remote_pgdata_filelist(). - * Parse received message into pgFile structure. - */ -static void -ReceiveFileList(parray* files, PGconn *conn, PGresult *res, int rownum) -{ - char filename[MAXPGPATH]; - pgoff_t current_len_left = 0; - bool basetablespace; - char *copybuf = NULL; - pgFile *pgfile; - - /* What for do we need this basetablespace field?? */ - basetablespace = PQgetisnull(res, rownum, 0); - if (basetablespace) - elog(LOG,"basetablespace"); - else - elog(LOG, "basetablespace %s", PQgetvalue(res, rownum, 1)); - - res = PQgetResult(conn); - - if (PQresultStatus(res) != PGRES_COPY_OUT) - elog(ERROR, "Could not get COPY data stream: %s", PQerrorMessage(conn)); - - while (1) - { - int r; - int filemode; - - if (copybuf != NULL) - { - PQfreemem(copybuf); - copybuf = NULL; - } - - r = PQgetCopyData(conn, ©buf, 0); - - if (r == -2) - elog(ERROR, "Could not read COPY data: %s", PQerrorMessage(conn)); - - /* end of copy */ - if (r == -1) - break; - - /* This must be the header for a new file */ - if (r != 512) - elog(ERROR, "Invalid tar block header size: %d\n", r); - - current_len_left = read_tar_number(©buf[124], 12); - - /* Set permissions on the file */ - filemode = read_tar_number(©buf[100], 8); - - /* First part of header is zero terminated filename */ - snprintf(filename, sizeof(filename), "%s", copybuf); - - pgfile = pgFileInit(filename); - pgfile->size = current_len_left; - pgfile->mode |= filemode; +static void *StreamLog(void *arg); +static void IdentifySystem(StreamThreadArg *stream_thread_arg); - if (filename[strlen(filename) - 1] == '/') - { - /* Symbolic link or directory has size zero */ - Assert (pgfile->size == 0); - /* Ends in a slash means directory or symlink to directory */ - if (copybuf[156] == '5') - { - /* Directory */ - pgfile->mode |= S_IFDIR; - } - else if (copybuf[156] == '2') - { - /* Symlink */ -#ifndef WIN32 - pgfile->mode |= S_IFLNK; -#else - pgfile->mode |= S_IFDIR; -#endif - } - else - elog(ERROR, "Unrecognized link indicator \"%c\"\n", - copybuf[156]); - } - else - { - /* regular file */ - pgfile->mode |= S_IFREG; - } +static void check_external_for_tablespaces(parray *external_list, + PGconn *backup_conn); +static parray *get_database_map(PGconn *pg_startbackup_conn); - parray_append(files, pgfile); - } +/* pgpro specific functions */ +static bool pgpro_support(PGconn *conn); - if (copybuf != NULL) - PQfreemem(copybuf); -} +/* Check functions */ +static bool pg_checksum_enable(PGconn *conn); +static bool pg_is_in_recovery(PGconn *conn); +static bool pg_is_superuser(PGconn *conn); +static void check_server_version(PGconn *conn, PGNodeInfo *nodeInfo); +static void confirm_block_size(PGconn *conn, const char *name, int blcksz); +static void set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i); -/* read one file via replication protocol - * and write it to the destination subdir in 'backup_path' */ static void -remote_copy_file(PGconn *conn, pgFile* file) +backup_stopbackup_callback(bool fatal, void *userdata) { - PGresult *res; - char *copybuf = NULL; - char buf[32768]; - FILE *out; - char database_path[MAXPGPATH]; - char to_path[MAXPGPATH]; - bool skip_padding = false; - - pgBackupGetPath(¤t, database_path, lengthof(database_path), - DATABASE_DIR); - join_path_components(to_path, database_path, file->path); - - out = fopen(to_path, PG_BINARY_W); - if (out == NULL) - { - int errno_tmp = errno; - elog(ERROR, "cannot open destination file \"%s\": %s", - to_path, strerror(errno_tmp)); - } - - INIT_CRC32C(file->crc); - - /* read from stream and write to backup file */ - while (1) - { - int row_length; - int errno_tmp; - int write_buffer_size = 0; - if (copybuf != NULL) - { - PQfreemem(copybuf); - copybuf = NULL; - } - - row_length = PQgetCopyData(conn, ©buf, 0); - - if (row_length == -2) - elog(ERROR, "Could not read COPY data: %s", PQerrorMessage(conn)); - - if (row_length == -1) - break; - - if (!skip_padding) - { - write_buffer_size = Min(row_length, sizeof(buf)); - memcpy(buf, copybuf, write_buffer_size); - COMP_CRC32C(file->crc, buf, write_buffer_size); - - /* TODO calc checksum*/ - if (fwrite(buf, 1, write_buffer_size, out) != write_buffer_size) - { - errno_tmp = errno; - /* oops */ - FIN_CRC32C(file->crc); - fclose(out); - PQfinish(conn); - elog(ERROR, "cannot write to \"%s\": %s", to_path, - strerror(errno_tmp)); - } - - file->read_size += write_buffer_size; - } - if (file->read_size >= file->size) - { - skip_padding = true; - } - } - - res = PQgetResult(conn); - - /* File is not found. That's normal. */ - if (PQresultStatus(res) != PGRES_COMMAND_OK) - { - elog(ERROR, "final receive failed: status %d ; %s",PQresultStatus(res), PQerrorMessage(conn)); - } - - file->write_size = (int64) file->read_size; - FIN_CRC32C(file->crc); - - fclose(out); -} - -/* - * Take a remote backup of the PGDATA at a file level. - * Copy all directories and files listed in backup_files_list. - */ -static void * -remote_backup_files(void *arg) -{ - int i; - backup_files_arg *arguments = (backup_files_arg *) arg; - int n_backup_files_list = parray_num(arguments->files_list); - PGconn *file_backup_conn = NULL; - - for (i = 0; i < n_backup_files_list; i++) + PGconn *pg_startbackup_conn = (PGconn *) userdata; + /* + * If backup is in progress, notify stop of backup to PostgreSQL + */ + if (backup_in_progress) { - char *query_str; - PGresult *res; - char *copybuf = NULL; - pgFile *file; - int row_length; - - file = (pgFile *) parray_get(arguments->files_list, i); - - /* We have already copied all directories */ - if (S_ISDIR(file->mode)) - continue; - - if (!pg_atomic_test_set_flag(&file->lock)) - continue; - - file_backup_conn = pgut_connect_replication(pgut_dbname); - - /* check for interrupt */ - if (interrupted) - elog(ERROR, "interrupted during backup"); - - query_str = psprintf("FILE_BACKUP FILEPATH '%s'",file->path); - - if (PQsendQuery(file_backup_conn, query_str) == 0) - elog(ERROR,"%s: could not send replication command \"%s\": %s", - PROGRAM_NAME, query_str, PQerrorMessage(file_backup_conn)); - - res = PQgetResult(file_backup_conn); - - /* File is not found. That's normal. */ - if (PQresultStatus(res) == PGRES_COMMAND_OK) - { - PQclear(res); - PQfinish(file_backup_conn); - continue; - } - - if (PQresultStatus(res) != PGRES_COPY_OUT) - { - PQclear(res); - PQfinish(file_backup_conn); - elog(ERROR, "Could not get COPY data stream: %s", PQerrorMessage(file_backup_conn)); - } - - /* read the header of the file */ - row_length = PQgetCopyData(file_backup_conn, ©buf, 0); - - if (row_length == -2) - elog(ERROR, "Could not read COPY data: %s", PQerrorMessage(file_backup_conn)); - - /* end of copy TODO handle it */ - if (row_length == -1) - elog(ERROR, "Unexpected end of COPY data"); - - if(row_length != 512) - elog(ERROR, "Invalid tar block header size: %d\n", row_length); - file->size = read_tar_number(©buf[124], 12); - - /* receive the data from stream and write to backup file */ - remote_copy_file(file_backup_conn, file); - - elog(VERBOSE, "File \"%s\". Copied " INT64_FORMAT " bytes", - file->path, file->write_size); - PQfinish(file_backup_conn); + elog(WARNING, "backup in progress, stop backup"); + pg_stop_backup(NULL, pg_startbackup_conn, NULL); /* don't care about stop_lsn in case of error */ } - - /* Data files transferring is successful */ - arguments->ret = 0; - - return NULL; } /* @@ -455,10 +129,11 @@ remote_backup_files(void *arg) * Move files from 'pgdata' to a subdirectory in 'backup_path'. */ static void -do_backup_instance(void) +do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool backup_logs) { int i; char database_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; /* Temp value. Used as template */ char dst_backup_path[MAXPGPATH]; char label[1024]; XLogRecPtr prev_backup_start_lsn = InvalidXLogRecPtr; @@ -470,36 +145,53 @@ do_backup_instance(void) pgBackup *prev_backup = NULL; parray *prev_backup_filelist = NULL; + parray *backup_list = NULL; + parray *external_dirs = NULL; + parray *database_map = NULL; - elog(LOG, "Database backup start"); + /* used for multitimeline incremental backup */ + parray *tli_list = NULL; - /* Initialize size summary */ - current.data_bytes = 0; + /* for fancy reporting */ + time_t start_time, end_time; + char pretty_time[20]; + char pretty_bytes[20]; - /* Obtain current timeline */ - if (is_remote_backup) + elog(LOG, "Database backup start"); + if(current.external_dir_str) { - char *sysidentifier; - TimeLineID starttli; - XLogRecPtr startpos; + external_dirs = make_external_directory_list(current.external_dir_str, + false); + check_external_for_tablespaces(external_dirs, backup_conn); + } - backup_conn_replication = pgut_connect_replication(pgut_dbname); + /* Clear ptrack files for not PTRACK backups */ + if (current.backup_mode != BACKUP_MODE_DIFF_PTRACK && nodeInfo->is_ptrack_enable) + pg_ptrack_clear(backup_conn, nodeInfo->ptrack_version_num); - /* Check replication prorocol connection */ - if (!RunIdentifySystem(backup_conn_replication, &sysidentifier, &starttli, &startpos, NULL)) - elog(ERROR, "Failed to send command for remote backup"); + /* notify start of backup to PostgreSQL server */ + time2iso(label, lengthof(label), current.start_time); + strncat(label, " with pg_probackup", lengthof(label) - + strlen(" with pg_probackup")); -// TODO implement the check -// if (&sysidentifier != system_identifier) -// elog(ERROR, "Backup data directory was initialized for system id %ld, but target backup directory system id is %ld", -// system_identifier, sysidentifier); + /* Call pg_start_backup function in PostgreSQL connect */ + pg_start_backup(label, smooth_checkpoint, ¤t, nodeInfo, backup_conn); - current.tli = starttli; + /* Obtain current timeline */ +#if PG_VERSION_NUM >= 90600 + current.tli = get_current_timeline(backup_conn); +#else + current.tli = get_current_timeline_from_control(false); +#endif - PQfinish(backup_conn_replication); - } - else - current.tli = get_current_timeline(false); + /* In PAGE mode or in ARCHIVE wal-mode wait for current segment */ + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE ||!stream_wal) + /* + * Do not wait start_lsn for stream backup. + * Because WAL streaming will start after pg_start_backup() in stream + * mode. + */ + wait_wal_lsn(current.start_lsn, true, current.tli, false, true, ERROR, false); /* * In incremental backup mode ensure that already-validated @@ -509,97 +201,125 @@ do_backup_instance(void) current.backup_mode == BACKUP_MODE_DIFF_PTRACK || current.backup_mode == BACKUP_MODE_DIFF_DELTA) { - parray *backup_list; - char prev_backup_filelist_path[MAXPGPATH]; - /* get list of backups already taken */ - backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); - prev_backup = catalog_get_last_data_backup(backup_list, current.tli); + prev_backup = catalog_get_last_data_backup(backup_list, current.tli, current.start_time); if (prev_backup == NULL) - elog(ERROR, "Valid backup on current timeline is not found. " - "Create new FULL backup before an incremental one."); - parray_free(backup_list); + { + /* try to setup multi-timeline backup chain */ + elog(WARNING, "Valid backup on current timeline %u is not found, " + "trying to look up on previous timelines", + current.tli); + + /* TODO: use read_timeline_history */ + tli_list = catalog_get_timelines(&instance_config); + + if (parray_num(tli_list) == 0) + elog(WARNING, "Cannot find valid backup on previous timelines, " + "WAL archive is not available"); + else + { + prev_backup = get_multi_timeline_parent(backup_list, tli_list, current.tli, + current.start_time, &instance_config); + + if (prev_backup == NULL) + elog(WARNING, "Cannot find valid backup on previous timelines"); + } + + /* failed to find suitable parent, error out */ + if (!prev_backup) + elog(ERROR, "Create new full backup before an incremental one"); + } + } + + if (prev_backup) + { + if (parse_program_version(prev_backup->program_version) > parse_program_version(PROGRAM_VERSION)) + elog(ERROR, "pg_probackup binary version is %s, but backup %s version is %s. " + "pg_probackup do not guarantee to be forward compatible. " + "Please upgrade pg_probackup binary.", + PROGRAM_VERSION, base36enc(prev_backup->start_time), prev_backup->program_version); + + elog(INFO, "Parent backup: %s", base36enc(prev_backup->start_time)); - pgBackupGetPath(prev_backup, prev_backup_filelist_path, - lengthof(prev_backup_filelist_path), DATABASE_FILE_LIST); /* Files of previous backup needed by DELTA backup */ - prev_backup_filelist = dir_read_file_list(NULL, prev_backup_filelist_path); + prev_backup_filelist = get_backup_filelist(prev_backup, true); /* If lsn is not NULL, only pages with higher lsn will be copied. */ prev_backup_start_lsn = prev_backup->start_lsn; current.parent_backup = prev_backup->start_time; - pgBackupWriteBackupControlFile(¤t); + write_backup(¤t, true); } /* - * It`s illegal to take PTRACK backup if LSN from ptrack_control() is not equal to - * stort_backup LSN of previous backup + * It`s illegal to take PTRACK backup if LSN from ptrack_control() is not + * equal to start_lsn of previous backup. */ if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) { - XLogRecPtr ptrack_lsn = get_last_ptrack_lsn(); + XLogRecPtr ptrack_lsn = get_last_ptrack_lsn(backup_conn, nodeInfo); - if (ptrack_lsn > prev_backup->stop_lsn || ptrack_lsn == InvalidXLogRecPtr) + if (ptrack_lsn > prev_backup->start_lsn || ptrack_lsn == InvalidXLogRecPtr) { - elog(ERROR, "LSN from ptrack_control %lx differs from STOP LSN of previous backup %lx.\n" + elog(ERROR, "LSN from ptrack_control %X/%X differs from Start LSN of previous backup %X/%X.\n" "Create new full backup before an incremental one.", - ptrack_lsn, prev_backup->stop_lsn); + (uint32) (ptrack_lsn >> 32), (uint32) (ptrack_lsn), + (uint32) (prev_backup->start_lsn >> 32), + (uint32) (prev_backup->start_lsn)); } } - /* Clear ptrack files for FULL and PAGE backup */ - if (current.backup_mode != BACKUP_MODE_DIFF_PTRACK && is_ptrack_enable) - pg_ptrack_clear(); + /* For incremental backup check that start_lsn is not from the past + * Though it will not save us if PostgreSQL instance is actually + * restored STREAM backup. + */ + if (current.backup_mode != BACKUP_MODE_FULL && + prev_backup->start_lsn > current.start_lsn) + elog(ERROR, "Current START LSN %X/%X is lower than START LSN %X/%X of previous backup %s. " + "It may indicate that we are trying to backup PostgreSQL instance from the past.", + (uint32) (current.start_lsn >> 32), (uint32) (current.start_lsn), + (uint32) (prev_backup->start_lsn >> 32), (uint32) (prev_backup->start_lsn), + base36enc(prev_backup->start_time)); - /* notify start of backup to PostgreSQL server */ - time2iso(label, lengthof(label), current.start_time); - strncat(label, " with pg_probackup", lengthof(label) - - strlen(" with pg_probackup")); - pg_start_backup(label, smooth_checkpoint, ¤t); + /* Update running backup meta with START LSN */ + write_backup(¤t, true); pgBackupGetPath(¤t, database_path, lengthof(database_path), DATABASE_DIR); + pgBackupGetPath(¤t, external_prefix, lengthof(external_prefix), + EXTERNAL_DIR); /* start stream replication */ if (stream_wal) { + /* How long we should wait for streaming end after pg_stop_backup */ + stream_stop_timeout = checkpoint_timeout(backup_conn); + stream_stop_timeout = stream_stop_timeout + stream_stop_timeout * 0.1; + join_path_components(dst_backup_path, database_path, PG_XLOG_DIR); - dir_create_dir(dst_backup_path, DIR_PERMISSION); + fio_mkdir(dst_backup_path, DIR_PERMISSION, FIO_BACKUP_HOST); stream_thread_arg.basedir = dst_backup_path; /* * Connect in replication mode to the server. */ - stream_thread_arg.conn = pgut_connect_replication(pgut_dbname); - - if (!CheckServerVersionForStreaming(stream_thread_arg.conn)) - { - PQfinish(stream_thread_arg.conn); - /* - * Error message already written in CheckServerVersionForStreaming(). - * There's no hope of recovering from a version mismatch, so don't - * retry. - */ - elog(ERROR, "Cannot continue backup because stream connect has failed."); - } - - /* - * Identify server, obtaining start LSN position and current timeline ID - * at the same time, necessary if not valid data can be found in the - * existing output directory. - */ - if (!RunIdentifySystem(stream_thread_arg.conn, NULL, NULL, NULL, NULL)) - { - PQfinish(stream_thread_arg.conn); - elog(ERROR, "Cannot continue backup because stream connect has failed."); - } + stream_thread_arg.conn = pgut_connect_replication(instance_config.conn_opt.pghost, + instance_config.conn_opt.pgport, + instance_config.conn_opt.pgdatabase, + instance_config.conn_opt.pguser); + /* sanity */ + IdentifySystem(&stream_thread_arg); /* By default there are some error */ stream_thread_arg.ret = 1; + /* we must use startpos as start_lsn from start_backup */ + stream_thread_arg.startpos = current.start_lsn; + stream_thread_arg.starttli = current.tli; + thread_interrupted = false; pthread_create(&stream_thread, NULL, StreamLog, &stream_thread_arg); } @@ -607,10 +327,68 @@ do_backup_instance(void) backup_files_list = parray_new(); /* list files with the logical path. omit $PGDATA */ - if (is_remote_backup) - get_remote_pgdata_filelist(backup_files_list); + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(backup_files_list, instance_config.pgdata, + true, true, false, backup_logs, true, 0); else - dir_list_file(backup_files_list, pgdata, true, true, false); + dir_list_file(backup_files_list, instance_config.pgdata, + true, true, false, backup_logs, true, 0, FIO_LOCAL_HOST); + + /* + * Get database_map (name to oid) for use in partial restore feature. + * It's possible that we fail and database_map will be NULL. + */ + database_map = get_database_map(backup_conn); + + /* + * Append to backup list all files and directories + * from external directory option + */ + if (external_dirs) + { + for (i = 0; i < parray_num(external_dirs); i++) + { + /* External dirs numeration starts with 1. + * 0 value is not external dir */ + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(backup_files_list, parray_get(external_dirs, i), + false, true, false, false, true, i+1); + else + dir_list_file(backup_files_list, parray_get(external_dirs, i), + false, true, false, false, true, i+1, FIO_LOCAL_HOST); + } + } + + /* close ssh session in main thread */ + fio_disconnect(); + + /* Sanity check for backup_files_list, thank you, Windows: + * https://github.com/postgrespro/pg_probackup/issues/48 + */ + + if (parray_num(backup_files_list) < 100) + elog(ERROR, "PGDATA is almost empty. Either it was concurrently deleted or " + "pg_probackup do not possess sufficient permissions to list PGDATA content"); + + /* Calculate pgdata_bytes */ + for (i = 0; i < parray_num(backup_files_list); i++) + { + pgFile *file = (pgFile *) parray_get(backup_files_list, i); + + if (file->external_dir_num != 0) + continue; + + if (S_ISDIR(file->mode)) + { + current.pgdata_bytes += 4096; + continue; + } + + current.pgdata_bytes += file->size; + } + + pretty_size(current.pgdata_bytes, pretty_bytes, lengthof(pretty_bytes)); + elog(INFO, "PGDATA size: %s", pretty_bytes); /* * Sort pathname ascending. It is necessary to create intermediate @@ -620,47 +398,71 @@ do_backup_instance(void) * 1 - create 'base' * 2 - create 'base/1' * - * Sorted array is used at least in parse_backup_filelist_filenames(), + * Sorted array is used at least in parse_filelist_filenames(), * extractPageMap(), make_pagemap_from_ptrack(). */ - parray_qsort(backup_files_list, pgFileComparePath); + parray_qsort(backup_files_list, pgFileCompareRelPathWithExternal); /* Extract information about files in backup_list parsing their names:*/ - parse_backup_filelist_filenames(backup_files_list, pgdata); + parse_filelist_filenames(backup_files_list, instance_config.pgdata); if (current.backup_mode != BACKUP_MODE_FULL) { - elog(LOG, "current_tli:%X", current.tli); - elog(LOG, "prev_backup->start_lsn: %X/%X", + elog(LOG, "Current tli: %X", current.tli); + elog(LOG, "Parent start_lsn: %X/%X", (uint32) (prev_backup->start_lsn >> 32), (uint32) (prev_backup->start_lsn)); - elog(LOG, "current.start_lsn: %X/%X", + elog(LOG, "start_lsn: %X/%X", (uint32) (current.start_lsn >> 32), (uint32) (current.start_lsn)); } /* * Build page mapping in incremental mode. */ - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) - { - /* - * Build the page map. Obtain information about changed pages - * reading WAL segments present in archives up to the point - * where this backup has started. - */ - extractPageMap(arclog_path, prev_backup->start_lsn, current.tli, - current.start_lsn, - /* - * For backup from master wait for previous segment. - * For backup from replica wait for current segment. - */ - !current.from_replica, backup_files_list); - } - else if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) + + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE || + current.backup_mode == BACKUP_MODE_DIFF_PTRACK) { - /* - * Build the page map from ptrack information. - */ - make_pagemap_from_ptrack(backup_files_list); + bool pagemap_isok = true; + + time(&start_time); + elog(INFO, "Extracting pagemap of changed blocks"); + + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) + { + /* + * Build the page map. Obtain information about changed pages + * reading WAL segments present in archives up to the point + * where this backup has started. + */ + pagemap_isok = extractPageMap(arclog_path, instance_config.xlog_seg_size, + prev_backup->start_lsn, prev_backup->tli, + current.start_lsn, current.tli, tli_list); + } + else if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) + { + /* + * Build the page map from ptrack information. + */ + if (nodeInfo->ptrack_version_num >= 20) + make_pagemap_from_ptrack_2(backup_files_list, backup_conn, + nodeInfo->ptrack_schema, + nodeInfo->ptrack_version_num, + prev_backup_start_lsn); + else if (nodeInfo->ptrack_version_num == 15 || + nodeInfo->ptrack_version_num == 16 || + nodeInfo->ptrack_version_num == 17) + make_pagemap_from_ptrack_1(backup_files_list, backup_conn); + } + + time(&end_time); + + /* TODO: add ms precision */ + if (pagemap_isok) + elog(INFO, "Pagemap successfully extracted, time elapsed: %.0f sec", + difftime(end_time, start_time)); + else + elog(ERROR, "Pagemap extraction failed, time elasped: %.0f sec", + difftime(end_time, start_time)); } /* @@ -674,20 +476,19 @@ do_backup_instance(void) if (S_ISDIR(file->mode)) { char dirpath[MAXPGPATH]; - char *dir_name; - char database_path[MAXPGPATH]; - if (!is_remote_backup) - dir_name = GetRelativePath(file->path, pgdata); + if (file->external_dir_num) + { + char temp[MAXPGPATH]; + snprintf(temp, MAXPGPATH, "%s%d", external_prefix, + file->external_dir_num); + join_path_components(dirpath, temp, file->rel_path); + } else - dir_name = file->path; - - elog(VERBOSE, "Create directory \"%s\"", dir_name); - pgBackupGetPath(¤t, database_path, lengthof(database_path), - DATABASE_DIR); + join_path_components(dirpath, database_path, file->rel_path); - join_path_components(dirpath, database_path, dir_name); - dir_create_dir(dirpath, DIR_PERMISSION); + elog(VERBOSE, "Create directory '%s'", dirpath); + fio_mkdir(dirpath, DIR_PERMISSION, FIO_BACKUP_HOST); } /* setup threads */ @@ -698,7 +499,15 @@ do_backup_instance(void) parray_qsort(backup_files_list, pgFileCompareSize); /* Sort the array for binary search */ if (prev_backup_filelist) - parray_qsort(prev_backup_filelist, pgFileComparePath); + parray_qsort(prev_backup_filelist, pgFileCompareRelPathWithExternal); + + /* write initial backup_content.control file and update backup.control */ + write_backup_filelist(¤t, backup_files_list, + instance_config.pgdata, external_dirs, true); + write_backup(¤t, true); + + /* Init backup page header map */ + init_header_map(¤t); /* init thread args with own file lists */ threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); @@ -708,29 +517,32 @@ do_backup_instance(void) { backup_files_arg *arg = &(threads_args[i]); - arg->from_root = pgdata; + arg->nodeInfo = nodeInfo; + arg->from_root = instance_config.pgdata; arg->to_root = database_path; + arg->external_prefix = external_prefix; + arg->external_dirs = external_dirs; arg->files_list = backup_files_list; arg->prev_filelist = prev_backup_filelist; arg->prev_start_lsn = prev_backup_start_lsn; - arg->backup_conn = NULL; - arg->cancel_conn = NULL; + arg->conn_arg.conn = NULL; + arg->conn_arg.cancel_conn = NULL; + arg->hdr_map = &(current.hdr_map); + arg->thread_num = i+1; /* By default there are some error */ arg->ret = 1; } /* Run threads */ - elog(LOG, "Start transfering data files"); + thread_interrupted = false; + elog(INFO, "Start transferring data files"); + time(&start_time); for (i = 0; i < num_threads; i++) { backup_files_arg *arg = &(threads_args[i]); elog(VERBOSE, "Start thread num: %i", i); - - if (!is_remote_backup) - pthread_create(&threads[i], NULL, backup_files, arg); - else - pthread_create(&threads[i], NULL, remote_backup_files, arg); + pthread_create(&threads[i], NULL, backup_files, arg); } /* Wait threads */ @@ -740,10 +552,16 @@ do_backup_instance(void) if (threads_args[i].ret == 1) backup_isok = false; } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); if (backup_isok) - elog(LOG, "Data files are transfered"); + elog(INFO, "Data files are transferred, time elapsed: %s", + pretty_time); else - elog(ERROR, "Data files transferring failed"); + elog(ERROR, "Data files transferring failed, time elapsed: %s", + pretty_time); /* clean previous backup file list */ if (prev_backup_filelist) @@ -753,33 +571,86 @@ do_backup_instance(void) } /* Notify end of backup */ - pg_stop_backup(¤t); + pg_stop_backup(¤t, backup_conn, nodeInfo); + + /* In case of backup from replica >= 9.6 we must fix minRecPoint, + * First we must find pg_control in backup_files_list. + */ + if (current.from_replica && !exclusive_backup) + { + pgFile *pg_control = NULL; + + for (i = 0; i < parray_num(backup_files_list); i++) + { + pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i); + + if (tmp_file->external_dir_num == 0 && + (strcmp(tmp_file->rel_path, XLOG_CONTROL_FILE) == 0)) + { + pg_control = tmp_file; + break; + } + } + + if (!pg_control) + elog(ERROR, "Failed to find file \"%s\" in backup filelist.", + XLOG_CONTROL_FILE); + + set_min_recovery_point(pg_control, database_path, current.stop_lsn); + } + + /* close and sync page header map */ + if (current.hdr_map.fp) + { + cleanup_header_map(&(current.hdr_map)); + + if (fio_sync(current.hdr_map.path, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync file \"%s\": %s", current.hdr_map.path, strerror(errno)); + } + + /* close ssh session in main thread */ + fio_disconnect(); /* Add archived xlog files into the list of files of this backup */ if (stream_wal) { - parray *xlog_files_list; + parray *xlog_files_list; char pg_xlog_path[MAXPGPATH]; + char wal_full_path[MAXPGPATH]; /* Scan backup PG_XLOG_DIR */ xlog_files_list = parray_new(); join_path_components(pg_xlog_path, database_path, PG_XLOG_DIR); - dir_list_file(xlog_files_list, pg_xlog_path, false, true, false); + dir_list_file(xlog_files_list, pg_xlog_path, false, true, false, false, true, 0, + FIO_BACKUP_HOST); + /* TODO: Drop streamed WAL segments greater than stop_lsn */ for (i = 0; i < parray_num(xlog_files_list); i++) { pgFile *file = (pgFile *) parray_get(xlog_files_list, i); - if (S_ISREG(file->mode)) - calc_file_checksum(file); - /* Remove file path root prefix*/ - if (strstr(file->path, database_path) == file->path) - { - char *ptr = file->path; + join_path_components(wal_full_path, pg_xlog_path, file->rel_path); - file->path = pstrdup(GetRelativePath(ptr, database_path)); - free(ptr); - } + if (!S_ISREG(file->mode)) + continue; + + file->crc = pgFileGetCRC(wal_full_path, true, false); + file->write_size = file->size; + + /* overwrite rel_path, because now it is relative to + * /backup_dir/backups/instance_name/backup_id/database/pg_xlog/ + */ + pg_free(file->rel_path); + + /* Now it is relative to /backup_dir/backups/instance_name/backup_id/database/ */ + file->rel_path = pgut_strdup(GetRelativePath(wal_full_path, database_path)); + + file->name = last_dir_separator(file->rel_path); + + if (file->name == NULL) // TODO: do it in pgFileInit + file->name = file->rel_path; + else + file->name++; } /* Add xlog files into the list of backed up files */ @@ -787,20 +658,87 @@ do_backup_instance(void) parray_free(xlog_files_list); } - /* Print the list of files to backup catalog */ - pgBackupWriteFileList(¤t, backup_files_list, pgdata); + /* write database map to file and add it to control file */ + if (database_map) + { + write_database_map(¤t, database_map, backup_files_list); + /* cleanup */ + parray_walk(database_map, db_map_entry_free); + parray_free(database_map); + } - /* Compute summary of size of regular files in the backup */ - for (i = 0; i < parray_num(backup_files_list); i++) + /* Print the list of files to backup catalog */ + write_backup_filelist(¤t, backup_files_list, instance_config.pgdata, + external_dirs, true); + /* update backup control file to update size info */ + write_backup(¤t, true); + + /* Sync all copied files unless '--no-sync' flag is used */ + if (no_sync) + elog(WARNING, "Backup files are not synced to disk"); + else { - pgFile *file = (pgFile *) parray_get(backup_files_list, i); + elog(INFO, "Syncing backup files to disk"); + time(&start_time); - if (S_ISDIR(file->mode)) - current.data_bytes += 4096; + for (i = 0; i < parray_num(backup_files_list); i++) + { + char to_fullpath[MAXPGPATH]; + pgFile *file = (pgFile *) parray_get(backup_files_list, i); + + /* TODO: sync directory ? */ + if (S_ISDIR(file->mode)) + continue; + + if (file->write_size <= 0) + continue; + + /* construct fullpath */ + if (file->external_dir_num == 0) + join_path_components(to_fullpath, database_path, file->rel_path); + else + { + char external_dst[MAXPGPATH]; + + makeExternalDirPathByNum(external_dst, external_prefix, + file->external_dir_num); + join_path_components(to_fullpath, external_dst, file->rel_path); + } + + if (fio_sync(to_fullpath, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync file \"%s\": %s", to_fullpath, strerror(errno)); + } - /* Count the amount of the data actually copied */ - if (S_ISREG(file->mode)) - current.data_bytes += file->write_size; + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + elog(INFO, "Backup files are synced, time elapsed: %s", pretty_time); + } + + /* be paranoid about instance been from the past */ + if (current.backup_mode != BACKUP_MODE_FULL && + current.stop_lsn < prev_backup->stop_lsn) + elog(ERROR, "Current backup STOP LSN %X/%X is lower than STOP LSN %X/%X of previous backup %s. " + "It may indicate that we are trying to backup PostgreSQL instance from the past.", + (uint32) (current.stop_lsn >> 32), (uint32) (current.stop_lsn), + (uint32) (prev_backup->stop_lsn >> 32), (uint32) (prev_backup->stop_lsn), + base36enc(prev_backup->stop_lsn)); + + /* clean external directories list */ + if (external_dirs) + free_dir_list(external_dirs); + + /* Cleanup */ + if (backup_list) + { + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); + } + + if (tli_list) + { + parray_walk(tli_list, timelineInfoFree); + parray_free(tli_list); } parray_walk(backup_files_list, pgFileFree); @@ -808,140 +746,228 @@ do_backup_instance(void) backup_files_list = NULL; } +/* + * Common code for CHECKDB and BACKUP commands. + * Ensure that we're able to connect to the instance + * check compatibility and fill basic info. + * For checkdb launched in amcheck mode with pgdata validation + * do not check system ID, it gives user an opportunity to + * check remote PostgreSQL instance. + * Also checking system ID in this case serves no purpose, because + * all work is done by server. + * + * Returns established connection + */ +PGconn * +pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo) +{ + PGconn *cur_conn; + + /* Create connection for PostgreSQL */ + cur_conn = pgut_connect(conn_opt.pghost, conn_opt.pgport, + conn_opt.pgdatabase, + conn_opt.pguser); + + current.primary_conninfo = pgut_get_conninfo_string(cur_conn); + + /* Confirm data block size and xlog block size are compatible */ + confirm_block_size(cur_conn, "block_size", BLCKSZ); + confirm_block_size(cur_conn, "wal_block_size", XLOG_BLCKSZ); + nodeInfo->block_size = BLCKSZ; + nodeInfo->wal_block_size = XLOG_BLCKSZ; + nodeInfo->is_superuser = pg_is_superuser(cur_conn); + nodeInfo->pgpro_support = pgpro_support(cur_conn); + + current.from_replica = pg_is_in_recovery(cur_conn); + + /* Confirm that this server version is supported */ + check_server_version(cur_conn, nodeInfo); + + if (pg_checksum_enable(cur_conn)) + current.checksum_version = 1; + else + current.checksum_version = 0; + + nodeInfo->checksum_version = current.checksum_version; + + if (current.checksum_version) + elog(LOG, "This PostgreSQL instance was initialized with data block checksums. " + "Data block corruption will be detected"); + else + elog(WARNING, "This PostgreSQL instance was initialized without data block checksums. " + "pg_probackup have no way to detect data block corruption without them. " + "Reinitialize PGDATA with option '--data-checksums'."); + + if (nodeInfo->is_superuser) + elog(WARNING, "Current PostgreSQL role is superuser. " + "It is not recommended to run backup or checkdb as superuser."); + + StrNCpy(current.server_version, nodeInfo->server_version_str, + sizeof(current.server_version)); + + return cur_conn; +} + /* * Entry point of pg_probackup BACKUP subcommand. */ int -do_backup(time_t start_time) +do_backup(time_t start_time, pgSetBackupParams *set_backup_params, + bool no_validate, bool no_sync, bool backup_logs) { + PGconn *backup_conn = NULL; + PGNodeInfo nodeInfo; + char pretty_bytes[20]; - /* PGDATA and BACKUP_MODE are always required */ - if (pgdata == NULL) + /* Initialize PGInfonode */ + pgNodeInit(&nodeInfo); + + if (!instance_config.pgdata) elog(ERROR, "required parameter not specified: PGDATA " "(-D, --pgdata)"); - if (current.backup_mode == BACKUP_MODE_INVALID) - elog(ERROR, "required parameter not specified: BACKUP_MODE " - "(-b, --backup-mode)"); - /* Create connection for PostgreSQL */ - backup_conn = pgut_connect(pgut_dbname); - pgut_atexit_push(backup_disconnect, NULL); + /* Update backup status and other metainfo. */ + current.status = BACKUP_STATUS_RUNNING; + current.start_time = start_time; + + StrNCpy(current.program_version, PROGRAM_VERSION, + sizeof(current.program_version)); - current.primary_conninfo = pgut_get_conninfo_string(backup_conn); + current.compress_alg = instance_config.compress_alg; + current.compress_level = instance_config.compress_level; + + /* Save list of external directories */ + if (instance_config.external_dir_str && + (pg_strcasecmp(instance_config.external_dir_str, "none") != 0)) + current.external_dir_str = instance_config.external_dir_str; + + elog(INFO, "Backup start, pg_probackup version: %s, instance: %s, backup ID: %s, backup mode: %s, " + "wal mode: %s, remote: %s, compress-algorithm: %s, compress-level: %i", + PROGRAM_VERSION, instance_name, base36enc(start_time), pgBackupGetBackupMode(¤t), + current.stream ? "STREAM" : "ARCHIVE", IsSshProtocol() ? "true" : "false", + deparse_compress_alg(current.compress_alg), current.compress_level); + + /* Create backup directory and BACKUP_CONTROL_FILE */ + if (pgBackupCreateDir(¤t)) + elog(ERROR, "Cannot create backup directory"); + if (!lock_backup(¤t, true)) + elog(ERROR, "Cannot lock backup %s directory", + base36enc(current.start_time)); + write_backup(¤t, true); - current.compress_alg = compress_alg; - current.compress_level = compress_level; + /* set the error processing function for the backup process */ + pgut_atexit_push(backup_cleanup, NULL); - /* Confirm data block size and xlog block size are compatible */ - confirm_block_size("block_size", BLCKSZ); - confirm_block_size("wal_block_size", XLOG_BLCKSZ); + elog(LOG, "Backup destination is initialized"); - current.from_replica = pg_is_in_recovery(); + /* + * setup backup_conn, do some compatibility checks and + * fill basic info about instance + */ + backup_conn = pgdata_basic_setup(instance_config.conn_opt, &nodeInfo); - /* Confirm that this server version is supported */ - check_server_version(); + if (current.from_replica) + elog(INFO, "Backup %s is going to be taken from standby", base36enc(start_time)); - /* TODO fix it for remote backup*/ - if (!is_remote_backup) - current.checksum_version = get_data_checksum_version(true); + /* TODO, print PostgreSQL full version */ + //elog(INFO, "PostgreSQL version: %s", nodeInfo.server_version_str); - is_checksum_enabled = pg_checksum_enable(); + /* + * Ensure that backup directory was initialized for the same PostgreSQL + * instance we opened connection to. And that target backup database PGDATA + * belogns to the same instance. + */ + check_system_identifiers(backup_conn, instance_config.pgdata); - if (is_checksum_enabled) - elog(LOG, "This PostgreSQL instance was initialized with data block checksums. " - "Data block corruption will be detected"); - else - elog(WARNING, "This PostgreSQL instance was initialized without data block checksums. " - "pg_probackup have no way to detect data block corruption without them. " - "Reinitialize PGDATA with option '--data-checksums'."); + /* below perform checks specific for backup command */ +#if PG_VERSION_NUM >= 110000 + if (!RetrieveWalSegSize(backup_conn)) + elog(ERROR, "Failed to retrieve wal_segment_size"); +#endif - StrNCpy(current.server_version, server_version_str, - sizeof(current.server_version)); - current.stream = stream_wal; + get_ptrack_version(backup_conn, &nodeInfo); + // elog(WARNING, "ptrack_version_num %d", ptrack_version_num); - is_ptrack_support = pg_ptrack_support(); - if (is_ptrack_support) - { - is_ptrack_enable = pg_ptrack_enable(); - } + if (nodeInfo.ptrack_version_num > 0) + nodeInfo.is_ptrack_enable = pg_ptrack_enable(backup_conn, nodeInfo.ptrack_version_num); if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) { - if (!is_ptrack_support) + if (nodeInfo.ptrack_version_num == 0) elog(ERROR, "This PostgreSQL instance does not support ptrack"); else { - if(!is_ptrack_enable) + if (!nodeInfo.is_ptrack_enable) elog(ERROR, "Ptrack is disabled"); } } - if (current.from_replica) - { + if (current.from_replica && exclusive_backup) /* Check master connection options */ - if (master_host == NULL) + if (instance_config.master_conn_opt.pghost == NULL) elog(ERROR, "Options for connection to master must be provided to perform backup from replica"); - /* Create connection to master server */ - master_conn = pgut_connect_extended(master_host, master_port, master_db, master_user); - } - - /* Get exclusive lock of backup catalog */ - catalog_lock(); - - /* - * Ensure that backup directory was initialized for the same PostgreSQL - * instance we opened connection to. And that target backup database PGDATA - * belogns to the same instance. - */ - /* TODO fix it for remote backup */ - if (!is_remote_backup) - check_system_identifiers(); - - - /* Start backup. Update backup status. */ - current.status = BACKUP_STATUS_RUNNING; - current.start_time = start_time; - - /* Create backup directory and BACKUP_CONTROL_FILE */ - if (pgBackupCreateDir(¤t)) - elog(ERROR, "cannot create backup directory"); - pgBackupWriteBackupControlFile(¤t); - - elog(LOG, "Backup destination is initialized"); - - /* set the error processing function for the backup process */ - pgut_atexit_push(backup_cleanup, NULL); + /* add note to backup if requested */ + if (set_backup_params && set_backup_params->note) + add_note(¤t, set_backup_params->note); /* backup data */ - do_backup_instance(); + do_backup_instance(backup_conn, &nodeInfo, no_sync, backup_logs); pgut_atexit_pop(backup_cleanup, NULL); /* compute size of wal files of this backup stored in the archive */ if (!current.stream) { - current.wal_bytes = XLOG_SEG_SIZE * - (current.stop_lsn/XLogSegSize - current.start_lsn/XLogSegSize + 1); + XLogSegNo start_segno; + XLogSegNo stop_segno; + + GetXLogSegNo(current.start_lsn, start_segno, instance_config.xlog_seg_size); + GetXLogSegNo(current.stop_lsn, stop_segno, instance_config.xlog_seg_size); + current.wal_bytes = (stop_segno - start_segno) * instance_config.xlog_seg_size; + + /* + * If start_lsn and stop_lsn are located in the same segment, then + * set wal_bytes to the size of 1 segment. + */ + if (current.wal_bytes <= 0) + current.wal_bytes = instance_config.xlog_seg_size; } /* Backup is done. Update backup status */ current.end_time = time(NULL); current.status = BACKUP_STATUS_DONE; - pgBackupWriteBackupControlFile(¤t); + write_backup(¤t, true); + + /* Pin backup if requested */ + if (set_backup_params && + (set_backup_params->ttl > 0 || + set_backup_params->expire_time > 0)) + { + pin_backup(¤t, set_backup_params); + } - //elog(LOG, "Backup completed. Total bytes : " INT64_FORMAT "", - // current.data_bytes); + if (!no_validate) + pgBackupValidate(¤t, NULL); - pgBackupValidate(¤t); + /* Notify user about backup size */ + if (current.stream) + pretty_size(current.data_bytes + current.wal_bytes, pretty_bytes, lengthof(pretty_bytes)); + else + pretty_size(current.data_bytes, pretty_bytes, lengthof(pretty_bytes)); + elog(INFO, "Backup %s resident size: %s", base36enc(current.start_time), pretty_bytes); - elog(INFO, "Backup %s completed", base36enc(current.start_time)); + if (current.status == BACKUP_STATUS_OK || + current.status == BACKUP_STATUS_DONE) + elog(INFO, "Backup %s completed", base36enc(current.start_time)); + else + elog(ERROR, "Backup %s failed", base36enc(current.start_time)); /* - * After successfil backup completion remove backups + * After successful backup completion remove backups * which are expired according to retention policies */ - if (delete_expired || delete_wal) - do_retention_purge(); + if (delete_expired || merge_expired || delete_wal) + do_retention(); return 0; } @@ -950,70 +976,76 @@ do_backup(time_t start_time) * Confirm that this server version is supported */ static void -check_server_version(void) +check_server_version(PGconn *conn, PGNodeInfo *nodeInfo) { - PGresult *res; + PGresult *res = NULL; /* confirm server version */ - server_version = PQserverVersion(backup_conn); + nodeInfo->server_version = PQserverVersion(conn); - if (server_version == 0) - elog(ERROR, "Unknown server version %d", server_version); + if (nodeInfo->server_version == 0) + elog(ERROR, "Unknown server version %d", nodeInfo->server_version); - if (server_version < 100000) - sprintf(server_version_str, "%d.%d", - server_version / 10000, - (server_version / 100) % 100); + if (nodeInfo->server_version < 100000) + sprintf(nodeInfo->server_version_str, "%d.%d", + nodeInfo->server_version / 10000, + (nodeInfo->server_version / 100) % 100); else - sprintf(server_version_str, "%d", - server_version / 10000); + sprintf(nodeInfo->server_version_str, "%d", + nodeInfo->server_version / 10000); - if (server_version < 90500) + if (nodeInfo->server_version < 90500) elog(ERROR, "server version is %s, must be %s or higher", - server_version_str, "9.5"); + nodeInfo->server_version_str, "9.5"); - if (current.from_replica && server_version < 90600) + if (current.from_replica && nodeInfo->server_version < 90600) elog(ERROR, "server version is %s, must be %s or higher for backup from replica", - server_version_str, "9.6"); + nodeInfo->server_version_str, "9.6"); - res = pgut_execute_extended(backup_conn, "SELECT pgpro_edition()", - 0, NULL, true, true); + if (nodeInfo->pgpro_support) + res = pgut_execute(conn, "SELECT pgpro_edition()", 0, NULL); /* * Check major version of connected PostgreSQL and major version of * compiled PostgreSQL. */ #ifdef PGPRO_VERSION - if (PQresultStatus(res) == PGRES_FATAL_ERROR) + if (!res) /* It seems we connected to PostgreSQL (not Postgres Pro) */ elog(ERROR, "%s was built with Postgres Pro %s %s, " "but connection is made with PostgreSQL %s", - PROGRAM_NAME, PG_MAJORVERSION, PGPRO_EDITION, server_version_str); - else if (strcmp(server_version_str, PG_MAJORVERSION) != 0 && - strcmp(PQgetvalue(res, 0, 0), PGPRO_EDITION) != 0) - elog(ERROR, "%s was built with Postgres Pro %s %s, " - "but connection is made with Postgres Pro %s %s", - PROGRAM_NAME, PG_MAJORVERSION, PGPRO_EDITION, - server_version_str, PQgetvalue(res, 0, 0)); + PROGRAM_NAME, PG_MAJORVERSION, PGPRO_EDITION, nodeInfo->server_version_str); + else + { + if (strcmp(nodeInfo->server_version_str, PG_MAJORVERSION) != 0 && + strcmp(PQgetvalue(res, 0, 0), PGPRO_EDITION) != 0) + elog(ERROR, "%s was built with Postgres Pro %s %s, " + "but connection is made with Postgres Pro %s %s", + PROGRAM_NAME, PG_MAJORVERSION, PGPRO_EDITION, + nodeInfo->server_version_str, PQgetvalue(res, 0, 0)); + } #else - if (PQresultStatus(res) != PGRES_FATAL_ERROR) + if (res) /* It seems we connected to Postgres Pro (not PostgreSQL) */ elog(ERROR, "%s was built with PostgreSQL %s, " "but connection is made with Postgres Pro %s %s", PROGRAM_NAME, PG_MAJORVERSION, - server_version_str, PQgetvalue(res, 0, 0)); - else if (strcmp(server_version_str, PG_MAJORVERSION) != 0) - elog(ERROR, "%s was built with PostgreSQL %s, but connection is made with %s", - PROGRAM_NAME, PG_MAJORVERSION, server_version_str); + nodeInfo->server_version_str, PQgetvalue(res, 0, 0)); + else + { + if (strcmp(nodeInfo->server_version_str, PG_MAJORVERSION) != 0) + elog(ERROR, "%s was built with PostgreSQL %s, but connection is made with %s", + PROGRAM_NAME, PG_MAJORVERSION, nodeInfo->server_version_str); + } #endif - PQclear(res); + if (res) + PQclear(res); /* Do exclusive backup only for PostgreSQL 9.5 */ - exclusive_backup = server_version < 90600 || - current.backup_mode == BACKUP_MODE_DIFF_PTRACK; + exclusive_backup = nodeInfo->server_version < 90600; } /* @@ -1022,21 +1054,36 @@ check_server_version(void) * belogns to the same instance. * All system identifiers must be equal. */ -static void -check_system_identifiers(void) +void +check_system_identifiers(PGconn *conn, char *pgdata) { uint64 system_id_conn; uint64 system_id_pgdata; system_id_pgdata = get_system_identifier(pgdata); - system_id_conn = get_remote_system_identifier(backup_conn); - - if (system_id_conn != system_identifier) - elog(ERROR, "Backup data directory was initialized for system id %ld, but connected instance system id is %ld", - system_identifier, system_id_conn); - if (system_id_pgdata != system_identifier) - elog(ERROR, "Backup data directory was initialized for system id %ld, but target backup directory system id is %ld", - system_identifier, system_id_pgdata); + system_id_conn = get_remote_system_identifier(conn); + + /* for checkdb check only system_id_pgdata and system_id_conn */ + if (current.backup_mode == BACKUP_MODE_INVALID) + { + if (system_id_conn != system_id_pgdata) + { + elog(ERROR, "Data directory initialized with system id " UINT64_FORMAT ", " + "but connected instance system id is " UINT64_FORMAT, + system_id_pgdata, system_id_conn); + } + return; + } + + if (system_id_conn != instance_config.system_identifier) + elog(ERROR, "Backup data directory was initialized for system id " UINT64_FORMAT ", " + "but connected instance system id is " UINT64_FORMAT, + instance_config.system_identifier, system_id_conn); + + if (system_id_pgdata != instance_config.system_identifier) + elog(ERROR, "Backup data directory was initialized for system id " UINT64_FORMAT ", " + "but target backup directory system id is " UINT64_FORMAT, + instance_config.system_identifier, system_id_pgdata); } /* @@ -1044,15 +1091,15 @@ check_system_identifiers(void) * compatible settings. Currently check BLCKSZ and XLOG_BLCKSZ. */ static void -confirm_block_size(const char *name, int blcksz) +confirm_block_size(PGconn *conn, const char *name, int blcksz) { PGresult *res; char *endp; int block_size; - res = pgut_execute(backup_conn, "SELECT pg_catalog.current_setting($1)", 1, &name); + res = pgut_execute(conn, "SELECT pg_catalog.current_setting($1)", 1, &name); if (PQntuples(res) != 1 || PQnfields(res) != 1) - elog(ERROR, "cannot get %s: %s", name, PQerrorMessage(backup_conn)); + elog(ERROR, "cannot get %s: %s", name, PQerrorMessage(conn)); block_size = strtol(PQgetvalue(res, 0, 0), &endp, 10); if ((endp && *endp) || block_size != blcksz) @@ -1067,19 +1114,16 @@ confirm_block_size(const char *name, int blcksz) * Notify start of backup to PostgreSQL server. */ static void -pg_start_backup(const char *label, bool smooth, pgBackup *backup) +pg_start_backup(const char *label, bool smooth, pgBackup *backup, + PGNodeInfo *nodeInfo, PGconn *conn) { PGresult *res; const char *params[2]; - uint32 xlogid; - uint32 xrecoff; - PGconn *conn; + uint32 lsn_hi; + uint32 lsn_lo; params[0] = label; - /* For replica we call pg_start_backup() on master */ - conn = (backup->from_replica) ? master_conn : backup_conn; - /* 2nd argument is 'fast'*/ params[1] = smooth ? "false" : "true"; if (!exclusive_backup) @@ -1098,43 +1142,31 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) * is necessary to call pg_stop_backup() in backup_cleanup(). */ backup_in_progress = true; + pgut_atexit_push(backup_stopbackup_callback, conn); /* Extract timeline and LSN from results of pg_start_backup() */ - XLogDataFromLSN(PQgetvalue(res, 0, 0), &xlogid, &xrecoff); + XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); /* Calculate LSN */ - backup->start_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + backup->start_lsn = ((uint64) lsn_hi )<< 32 | lsn_lo; PQclear(res); - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) + if ((!stream_wal || current.backup_mode == BACKUP_MODE_DIFF_PAGE) && + !backup->from_replica && + !(nodeInfo->server_version < 90600 && + !nodeInfo->is_superuser)) /* * Switch to a new WAL segment. It is necessary to get archived WAL * segment, which includes start LSN of current backup. + * Don`t do this for replica backups and for PG 9.5 if pguser is not superuser + * (because in 9.5 only superuser can switch WAL) */ pg_switch_wal(conn); - - if (!stream_wal) - { - /* - * Do not wait start_lsn for stream backup. - * Because WAL streaming will start after pg_start_backup() in stream - * mode. - */ - /* In PAGE mode wait for current segment... */ - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) - wait_wal_lsn(backup->start_lsn, false); - /* ...for others wait for previous segment */ - else - wait_wal_lsn(backup->start_lsn, true); - } - - /* Wait for start_lsn to be replayed by replica */ - if (backup->from_replica) - wait_replica_wal_lsn(backup->start_lsn, true); } /* * Switch to a new WAL segment. It should be called only for master. + * For PG 9.5 it should be called only if pguser is superuser. */ static void pg_switch_wal(PGconn *conn) @@ -1145,96 +1177,123 @@ pg_switch_wal(PGconn *conn) res = pgut_execute(conn, "SET client_min_messages = warning;", 0, NULL); PQclear(res); - if (server_version >= 100000) - res = pgut_execute(conn, "SELECT * FROM pg_catalog.pg_switch_wal()", 0, NULL); - else - res = pgut_execute(conn, "SELECT * FROM pg_catalog.pg_switch_xlog()", 0, NULL); +#if PG_VERSION_NUM >= 100000 + res = pgut_execute(conn, "SELECT pg_catalog.pg_switch_wal()", 0, NULL); +#else + res = pgut_execute(conn, "SELECT pg_catalog.pg_switch_xlog()", 0, NULL); +#endif PQclear(res); } /* - * Check if the instance supports ptrack - * TODO Maybe we should rather check ptrack_version()? + * Check if the instance is PostgresPro fork. */ static bool -pg_ptrack_support(void) +pgpro_support(PGconn *conn) { - PGresult *res_db; + PGresult *res; - res_db = pgut_execute(backup_conn, - "SELECT proname FROM pg_proc WHERE proname='ptrack_version'", + res = pgut_execute(conn, + "SELECT proname FROM pg_proc WHERE proname='pgpro_edition'", 0, NULL); - if (PQntuples(res_db) == 0) + + if (PQresultStatus(res) == PGRES_TUPLES_OK && + (PQntuples(res) == 1) && + (strcmp(PQgetvalue(res, 0, 0), "pgpro_edition") == 0)) { - PQclear(res_db); - return false; + PQclear(res); + return true; } - PQclear(res_db); - res_db = pgut_execute(backup_conn, - "SELECT pg_catalog.ptrack_version()", - 0, NULL); - if (PQntuples(res_db) == 0) + PQclear(res); + return false; +} + +/* + * Fill 'datname to Oid' map + * + * This function can fail to get the map for legal reasons, e.g. missing + * permissions on pg_database during `backup`. + * As long as user do not use partial restore feature it`s fine. + * + * To avoid breaking a backward compatibility don't throw an ERROR, + * throw a warning instead of an error and return NULL. + * Caller is responsible for checking the result. + */ +parray * +get_database_map(PGconn *conn) +{ + PGresult *res; + parray *database_map = NULL; + int i; + + /* + * Do not include template0 and template1 to the map + * as default databases that must always be restored. + */ + res = pgut_execute_extended(conn, + "SELECT oid, datname FROM pg_catalog.pg_database " + "WHERE datname NOT IN ('template1', 'template0')", + 0, NULL, true, true); + + /* Don't error out, simply return NULL. See comment above. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - PQclear(res_db); - return false; + PQclear(res); + elog(WARNING, "Failed to get database map: %s", + PQerrorMessage(conn)); + + return NULL; } - /* Now we support only ptrack versions upper than 1.5 */ - if (strcmp(PQgetvalue(res_db, 0, 0), "1.5") != 0 && - strcmp(PQgetvalue(res_db, 0, 0), "1.6") != 0) + /* Construct database map */ + for (i = 0; i < PQntuples(res); i++) { - elog(WARNING, "Update your ptrack to the version 1.5 or upper. Current version is %s", PQgetvalue(res_db, 0, 0)); - PQclear(res_db); - return false; - } + char *datname = NULL; + db_map_entry *db_entry = (db_map_entry *) pgut_malloc(sizeof(db_map_entry)); - PQclear(res_db); - return true; -} + /* get Oid */ + db_entry->dbOid = atoi(PQgetvalue(res, i, 0)); -/* Check if ptrack is enabled in target instance */ -static bool -pg_ptrack_enable(void) -{ - PGresult *res_db; + /* get datname */ + datname = PQgetvalue(res, i, 1); + db_entry->datname = pgut_malloc(strlen(datname) + 1); + strcpy(db_entry->datname, datname); - res_db = pgut_execute(backup_conn, "show ptrack_enable", 0, NULL); + if (database_map == NULL) + database_map = parray_new(); - if (strcmp(PQgetvalue(res_db, 0, 0), "on") != 0) - { - PQclear(res_db); - return false; + parray_append(database_map, db_entry); } - PQclear(res_db); - return true; + + return database_map; } /* Check if ptrack is enabled in target instance */ static bool -pg_checksum_enable(void) +pg_checksum_enable(PGconn *conn) { PGresult *res_db; - res_db = pgut_execute(backup_conn, "show data_checksums", 0, NULL); + res_db = pgut_execute(conn, "SHOW data_checksums", 0, NULL); - if (strcmp(PQgetvalue(res_db, 0, 0), "on") != 0) + if (strcmp(PQgetvalue(res_db, 0, 0), "on") == 0) { PQclear(res_db); - return false; + return true; } PQclear(res_db); - return true; + return false; } /* Check if target instance is replica */ static bool -pg_is_in_recovery(void) +pg_is_in_recovery(PGconn *conn) { PGresult *res_db; - res_db = pgut_execute(backup_conn, "SELECT pg_catalog.pg_is_in_recovery()", 0, NULL); + res_db = pgut_execute(conn, "SELECT pg_catalog.pg_is_in_recovery()", 0, NULL); if (PQgetvalue(res_db, 0, 0)[0] == 't') { @@ -1245,250 +1304,106 @@ pg_is_in_recovery(void) return false; } -/* Clear ptrack files in all databases of the instance we connected to */ -static void -pg_ptrack_clear(void) -{ - PGresult *res_db, - *res; - const char *dbname; - int i; - Oid dbOid, tblspcOid; - char *params[2]; - - params[0] = palloc(64); - params[1] = palloc(64); - res_db = pgut_execute(backup_conn, "SELECT datname, oid, dattablespace FROM pg_database", - 0, NULL); - - for(i = 0; i < PQntuples(res_db); i++) - { - PGconn *tmp_conn; - - dbname = PQgetvalue(res_db, i, 0); - if (strcmp(dbname, "template0") == 0) - continue; - - dbOid = atoi(PQgetvalue(res_db, i, 1)); - tblspcOid = atoi(PQgetvalue(res_db, i, 2)); - - tmp_conn = pgut_connect(dbname); - res = pgut_execute(tmp_conn, "SELECT pg_catalog.pg_ptrack_clear()", 0, NULL); - - sprintf(params[0], "%i", dbOid); - sprintf(params[1], "%i", tblspcOid); - res = pgut_execute(tmp_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear_db($1, $2)", - 2, (const char **)params); - PQclear(res); - - pgut_disconnect(tmp_conn); - } - - pfree(params[0]); - pfree(params[1]); - PQclear(res_db); -} +/* Check if current PostgreSQL role is superuser */ static bool -pg_ptrack_get_and_clear_db(Oid dbOid, Oid tblspcOid) +pg_is_superuser(PGconn *conn) { - char *params[2]; - char *dbname; - PGresult *res_db; PGresult *res; - bool result; - - params[0] = palloc(64); - params[1] = palloc(64); - sprintf(params[0], "%i", dbOid); - res_db = pgut_execute(backup_conn, - "SELECT datname FROM pg_database WHERE oid=$1", - 1, (const char **) params); - /* - * If database is not found, it's not an error. - * It could have been deleted since previous backup. - */ - if (PQntuples(res_db) != 1 || PQnfields(res_db) != 1) - return false; + res = pgut_execute(conn, "SELECT pg_catalog.current_setting('is_superuser')", 0, NULL); - dbname = PQgetvalue(res_db, 0, 0); - - /* Always backup all files from template0 database */ - if (strcmp(dbname, "template0") == 0) + if (strcmp(PQgetvalue(res, 0, 0), "on") == 0) { - PQclear(res_db); + PQclear(res); return true; } - PQclear(res_db); - - sprintf(params[0], "%i", dbOid); - sprintf(params[1], "%i", tblspcOid); - res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear_db($1, $2)", - 2, (const char **)params); - - if (PQnfields(res) != 1) - elog(ERROR, "cannot perform pg_ptrack_get_and_clear_db()"); - - if (!parse_bool(PQgetvalue(res, 0, 0), &result)) - elog(ERROR, - "result of pg_ptrack_get_and_clear_db() is invalid: %s", - PQgetvalue(res, 0, 0)); - - PQclear(res); - pfree(params[0]); - pfree(params[1]); - - return result; -} - -/* Read and clear ptrack files of the target relation. - * Result is a bytea ptrack map of all segments of the target relation. - * case 1: we know a tablespace_oid, db_oid, and rel_filenode - * case 2: we know db_oid and rel_filenode (no tablespace_oid, because file in pg_default) - * case 3: we know only rel_filenode (because file in pg_global) - */ -static char * -pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode, - size_t *result_size) -{ - PGconn *tmp_conn; - PGresult *res_db, - *res; - char *params[2]; - char *result; - char *val; - - params[0] = palloc(64); - params[1] = palloc(64); - - /* regular file (not in directory 'global') */ - if (db_oid != 0) - { - char *dbname; - - sprintf(params[0], "%i", db_oid); - res_db = pgut_execute(backup_conn, - "SELECT datname FROM pg_database WHERE oid=$1", - 1, (const char **) params); - /* - * If database is not found, it's not an error. - * It could have been deleted since previous backup. - */ - if (PQntuples(res_db) != 1 || PQnfields(res_db) != 1) - return NULL; - - dbname = PQgetvalue(res_db, 0, 0); - - if (strcmp(dbname, "template0") == 0) - { - PQclear(res_db); - return NULL; - } - - tmp_conn = pgut_connect(dbname); - sprintf(params[0], "%i", tablespace_oid); - sprintf(params[1], "%i", rel_filenode); - res = pgut_execute(tmp_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear($1, $2)", - 2, (const char **)params); - - if (PQnfields(res) != 1) - elog(ERROR, "cannot get ptrack file from database \"%s\" by tablespace oid %u and relation oid %u", - dbname, tablespace_oid, rel_filenode); - PQclear(res_db); - pgut_disconnect(tmp_conn); - } - /* file in directory 'global' */ - else - { - /* - * execute ptrack_get_and_clear for relation in pg_global - * Use backup_conn, cause we can do it from any database. - */ - sprintf(params[0], "%i", tablespace_oid); - sprintf(params[1], "%i", rel_filenode); - res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear($1, $2)", - 2, (const char **)params); - - if (PQnfields(res) != 1) - elog(ERROR, "cannot get ptrack file from pg_global tablespace and relation oid %u", - rel_filenode); - } - - val = PQgetvalue(res, 0, 0); - - /* TODO Now pg_ptrack_get_and_clear() returns bytea ending with \x. - * It should be fixed in future ptrack releases, but till then we - * can parse it. - */ - if (strcmp("x", val+1) == 0) - { - /* Ptrack file is missing */ - return NULL; - } - - result = (char *) PQunescapeBytea((unsigned char *) PQgetvalue(res, 0, 0), - result_size); PQclear(res); - pfree(params[0]); - pfree(params[1]); - - return result; + return false; } /* - * Wait for target 'lsn'. + * Wait for target LSN or WAL segment, containing target LSN. * - * If current backup started in archive mode wait for 'lsn' to be archived in - * archive 'wal' directory with WAL segment file. - * If current backup started in stream mode wait for 'lsn' to be streamed in - * 'pg_wal' directory. + * Depending on value of flag in_stream_dir wait for target LSN to archived or + * streamed in 'archive_dir' or 'pg_wal' directory. * - * If 'wait_prev_segment' wait for previous segment. + * If flag 'is_start_lsn' is set then issue warning for first-time users. + * If flag 'in_prev_segment' is set, look for LSN in previous segment, + * with EndRecPtr >= Target LSN. It should be used only for solving + * invalid XRecOff problem. + * If flag 'segment_only' is set, then, instead of waiting for LSN, wait for segment, + * containing that LSN. + * If flags 'in_prev_segment' and 'segment_only' are both set, then wait for + * previous segment. + * + * Flag 'in_stream_dir' determine whether we looking for WAL in 'pg_wal' directory or + * in archive. Do note, that we cannot rely sorely on global variable 'stream_wal' because, + * for example, PAGE backup must(!) look for start_lsn in archive regardless of wal_mode. + * + * 'timeout_elevel' determine the elevel for timeout elog message. If elevel lighter than + * ERROR is used, then return InvalidXLogRecPtr. TODO: return something more concrete, for example 1. + * + * Returns target LSN if such is found, failing that returns LSN of record prior to target LSN. + * Returns InvalidXLogRecPtr if 'segment_only' flag is used. */ -static void -wait_wal_lsn(XLogRecPtr lsn, bool wait_prev_segment) +static XLogRecPtr +wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli, + bool in_prev_segment, bool segment_only, + int timeout_elevel, bool in_stream_dir) { - TimeLineID tli; XLogSegNo targetSegNo; - char wal_dir[MAXPGPATH], - wal_segment_path[MAXPGPATH]; - char wal_segment[MAXFNAMELEN]; + char pg_wal_dir[MAXPGPATH]; + char wal_segment_path[MAXPGPATH], + *wal_segment_dir, + wal_segment[MAXFNAMELEN]; bool file_exists = false; uint32 try_count = 0, timeout; + char *wal_delivery_str = in_stream_dir ? "streamed":"archived"; #ifdef HAVE_LIBZ char gz_wal_segment_path[MAXPGPATH]; #endif - tli = get_current_timeline(false); - - /* Compute the name of the WAL file containig requested LSN */ - XLByteToSeg(lsn, targetSegNo); - if (wait_prev_segment) + /* Compute the name of the WAL file containing requested LSN */ + GetXLogSegNo(target_lsn, targetSegNo, instance_config.xlog_seg_size); + if (in_prev_segment) targetSegNo--; - XLogFileName(wal_segment, tli, targetSegNo); + GetXLogFileName(wal_segment, tli, targetSegNo, + instance_config.xlog_seg_size); - if (stream_wal) + /* + * In pg_start_backup we wait for 'target_lsn' in 'pg_wal' directory if it is + * stream and non-page backup. Page backup needs archived WAL files, so we + * wait for 'target_lsn' in archive 'wal' directory for page backups. + * + * In pg_stop_backup it depends only on stream_wal. + */ + if (in_stream_dir) { - pgBackupGetPath2(¤t, wal_dir, lengthof(wal_dir), + pgBackupGetPath2(¤t, pg_wal_dir, lengthof(pg_wal_dir), DATABASE_DIR, PG_XLOG_DIR); - join_path_components(wal_segment_path, wal_dir, wal_segment); - - timeout = (uint32) checkpoint_timeout(); - timeout = timeout + timeout * 0.1; + join_path_components(wal_segment_path, pg_wal_dir, wal_segment); + wal_segment_dir = pg_wal_dir; } else { join_path_components(wal_segment_path, arclog_path, wal_segment); - timeout = archive_timeout; + wal_segment_dir = arclog_path; } - if (wait_prev_segment) + /* TODO: remove this in 3.0 (it is a cludge against some old bug with archive_timeout) */ + if (instance_config.archive_timeout > 0) + timeout = instance_config.archive_timeout; + else + timeout = ARCHIVE_TIMEOUT_DEFAULT; + + if (segment_only) elog(LOG, "Looking for segment: %s", wal_segment); else - elog(LOG, "Looking for LSN: %X/%X in segment: %s", (uint32) (lsn >> 32), (uint32) lsn, wal_segment); + elog(LOG, "Looking for LSN %X/%X in segment: %s", + (uint32) (target_lsn >> 32), (uint32) target_lsn, wal_segment); #ifdef HAVE_LIBZ snprintf(gz_wal_segment_path, sizeof(gz_wal_segment_path), "%s.gz", @@ -1500,13 +1415,13 @@ wait_wal_lsn(XLogRecPtr lsn, bool wait_prev_segment) { if (!file_exists) { - file_exists = fileExists(wal_segment_path); + file_exists = fileExists(wal_segment_path, FIO_BACKUP_HOST); /* Try to find compressed WAL file */ if (!file_exists) { #ifdef HAVE_LIBZ - file_exists = fileExists(gz_wal_segment_path); + file_exists = fileExists(gz_wal_segment_path, FIO_BACKUP_HOST); if (file_exists) elog(LOG, "Found compressed WAL segment: %s", wal_segment_path); #endif @@ -1517,19 +1432,51 @@ wait_wal_lsn(XLogRecPtr lsn, bool wait_prev_segment) if (file_exists) { - /* Do not check LSN for previous WAL segment */ - if (wait_prev_segment) - return; + /* Do not check for target LSN */ + if (segment_only) + return InvalidXLogRecPtr; /* - * A WAL segment found. Check LSN on it. + * A WAL segment found. Look for target LSN in it. */ - if ((stream_wal && wal_contains_lsn(wal_dir, lsn, tli)) || - (!stream_wal && wal_contains_lsn(arclog_path, lsn, tli))) + if (!XRecOffIsNull(target_lsn) && + wal_contains_lsn(wal_segment_dir, target_lsn, tli, + instance_config.xlog_seg_size)) /* Target LSN was found */ { - elog(LOG, "Found LSN: %X/%X", (uint32) (lsn >> 32), (uint32) lsn); - return; + elog(LOG, "Found LSN: %X/%X", (uint32) (target_lsn >> 32), (uint32) target_lsn); + return target_lsn; + } + + /* + * If we failed to get target LSN in a reasonable time, try + * to get LSN of last valid record prior to the target LSN. But only + * in case of a backup from a replica. + * Note, that with NullXRecOff target_lsn we do not wait + * for 'timeout / 2' seconds before going for previous record, + * because such LSN cannot be delivered at all. + * + * There are two cases for this: + * 1. Replica returned readpoint LSN which just do not exists. We want to look + * for previous record in the same(!) WAL segment which endpoint points to this LSN. + * 2. Replica returened endpoint LSN with NullXRecOff. We want to look + * for previous record which endpoint points greater or equal LSN in previous WAL segment. + */ + if (current.from_replica && + (XRecOffIsNull(target_lsn) || try_count > timeout / 2)) + { + XLogRecPtr res; + + res = get_prior_record_lsn(wal_segment_dir, current.start_lsn, target_lsn, tli, + in_prev_segment, instance_config.xlog_seg_size); + + if (!XLogRecPtrIsInvalid(res)) + { + /* LSN of the prior record was found */ + elog(LOG, "Found prior LSN: %X/%X", + (uint32) (res >> 32), (uint32) res); + return res; + } } } @@ -1541,96 +1488,35 @@ wait_wal_lsn(XLogRecPtr lsn, bool wait_prev_segment) /* Inform user if WAL segment is absent in first attempt */ if (try_count == 1) { - if (wait_prev_segment) - elog(INFO, "Wait for WAL segment %s to be archived", - wal_segment_path); - else - elog(INFO, "Wait for LSN %X/%X in archived WAL segment %s", - (uint32) (lsn >> 32), (uint32) lsn, wal_segment_path); - } - - if (timeout > 0 && try_count > timeout) - { - if (file_exists) - elog(ERROR, "WAL segment %s was archived, " - "but target LSN %X/%X could not be archived in %d seconds", - wal_segment, (uint32) (lsn >> 32), (uint32) lsn, timeout); - /* If WAL segment doesn't exist or we wait for previous segment */ - else - elog(ERROR, - "Switched WAL segment %s could not be archived in %d seconds", - wal_segment, timeout); - } - } -} - -/* - * Wait for target 'lsn' on replica instance from master. - */ -static void -wait_replica_wal_lsn(XLogRecPtr lsn, bool is_start_backup) -{ - uint32 try_count = 0; - - while (true) - { - PGresult *res; - uint32 xlogid; - uint32 xrecoff; - XLogRecPtr replica_lsn; - - /* - * For lsn from pg_start_backup() we need it to be replayed on replica's - * data. - */ - if (is_start_backup) - { - if (server_version >= 100000) - res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_last_wal_replay_lsn()", - 0, NULL); - else - res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_last_xlog_replay_location()", - 0, NULL); - } - /* - * For lsn from pg_stop_backup() we need it only to be received by - * replica and fsync()'ed on WAL segment. - */ - else - { - if (server_version >= 100000) - res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()", - 0, NULL); + if (segment_only) + elog(INFO, "Wait for WAL segment %s to be %s", + wal_segment_path, wal_delivery_str); else - res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_last_xlog_receive_location()", - 0, NULL); - } - - /* Extract timeline and LSN from result */ - XLogDataFromLSN(PQgetvalue(res, 0, 0), &xlogid, &xrecoff); - /* Calculate LSN */ - replica_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; - PQclear(res); + elog(INFO, "Wait for LSN %X/%X in %s WAL segment %s", + (uint32) (target_lsn >> 32), (uint32) target_lsn, + wal_delivery_str, wal_segment_path); + } - /* target lsn was replicated */ - if (replica_lsn >= lsn) - break; + if (!stream_wal && is_start_lsn && try_count == 30) + elog(WARNING, "By default pg_probackup assume WAL delivery method to be ARCHIVE. " + "If continuous archiving is not set up, use '--stream' option to make autonomous backup. " + "Otherwise check that continuous archiving works correctly."); - sleep(1); - if (interrupted) - elog(ERROR, "Interrupted during waiting for target LSN"); - try_count++; + if (timeout > 0 && try_count > timeout) + { + if (file_exists) + elog(timeout_elevel, "WAL segment %s was %s, " + "but target LSN %X/%X could not be archived in %d seconds", + wal_segment, wal_delivery_str, + (uint32) (target_lsn >> 32), (uint32) target_lsn, timeout); + /* If WAL segment doesn't exist or we wait for previous segment */ + else + elog(timeout_elevel, + "WAL segment %s could not be %s in %d seconds", + wal_segment, wal_delivery_str, timeout); - /* Inform user if target lsn is absent in first attempt */ - if (try_count == 1) - elog(INFO, "Wait for target LSN %X/%X to be received by replica", - (uint32) (lsn >> 32), (uint32) lsn); - - if (replica_timeout > 0 && try_count > replica_timeout) - elog(ERROR, "Target LSN %X/%X could not be recevied by replica " - "in %d seconds", - (uint32) (lsn >> 32), (uint32) lsn, - replica_timeout); + return InvalidXLogRecPtr; + } } } @@ -1638,15 +1524,16 @@ wait_replica_wal_lsn(XLogRecPtr lsn, bool is_start_backup) * Notify end of backup to PostgreSQL server. */ static void -pg_stop_backup(pgBackup *backup) +pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, + PGNodeInfo *nodeInfo) { PGconn *conn; PGresult *res; PGresult *tablespace_map_content = NULL; - uint32 xlogid; - uint32 xrecoff; - XLogRecPtr restore_lsn = InvalidXLogRecPtr; - int pg_stop_backup_timeout = 0; + uint32 lsn_hi; + uint32 lsn_lo; + //XLogRecPtr restore_lsn = InvalidXLogRecPtr; + int pg_stop_backup_timeout = 0; char path[MAXPGPATH]; char backup_label[MAXPGPATH]; FILE *fp; @@ -1654,6 +1541,8 @@ pg_stop_backup(pgBackup *backup) size_t len; char *val = NULL; char *stop_backup_query = NULL; + bool stop_lsn_exists = false; + XLogRecPtr stop_backup_lsn_tmp = InvalidXLogRecPtr; /* * We will use this values if there are no transactions between start_lsn @@ -1663,28 +1552,32 @@ pg_stop_backup(pgBackup *backup) TransactionId recovery_xid; if (!backup_in_progress) - elog(FATAL, "backup is not in progress"); + elog(ERROR, "backup is not in progress"); - /* For replica we call pg_stop_backup() on master */ - conn = (current.from_replica) ? master_conn : backup_conn; + conn = pg_startbackup_conn; /* Remove annoying NOTICE messages generated by backend */ res = pgut_execute(conn, "SET client_min_messages = warning;", 0, NULL); PQclear(res); - /* Create restore point */ - if (backup != NULL) + /* Make proper timestamp format for parse_time() */ + res = pgut_execute(conn, "SET datestyle = 'ISO, DMY';", 0, NULL); + PQclear(res); + + /* Create restore point + * Only if backup is from master. + * For PG 9.5 create restore point only if pguser is superuser. + */ + if (backup != NULL && !backup->from_replica && + !(nodeInfo->server_version < 90600 && + !nodeInfo->is_superuser)) { const char *params[1]; char name[1024]; - if (!current.from_replica) - snprintf(name, lengthof(name), "pg_probackup, backup_id %s", - base36enc(backup->start_time)); - else - snprintf(name, lengthof(name), "pg_probackup, backup_id %s. Replica Backup", - base36enc(backup->start_time)); + snprintf(name, lengthof(name), "pg_probackup, backup_id %s", + base36enc(backup->start_time)); params[0] = name; res = pgut_execute(conn, "SELECT pg_catalog.pg_create_restore_point($1)", @@ -1709,19 +1602,43 @@ pg_stop_backup(pgBackup *backup) * Stop the non-exclusive backup. Besides stop_lsn it returns from * pg_stop_backup(false) copy of the backup label and tablespace map * so they can be written to disk by the caller. + * In case of backup from replica >= 9.6 we do not trust minRecPoint + * and stop_backup LSN, so we use latest replayed LSN as STOP LSN. */ - stop_backup_query = "SELECT" - " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," - " current_timestamp(0)::timestamptz," - " lsn," - " labelfile," - " spcmapfile" - " FROM pg_catalog.pg_stop_backup(false)"; + + /* current is used here because of cleanup */ + if (current.from_replica) + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," +#if PG_VERSION_NUM >= 100000 + " pg_catalog.pg_last_wal_replay_lsn()," +#else + " pg_catalog.pg_last_xlog_replay_location()," +#endif + " labelfile," + " spcmapfile" +#if PG_VERSION_NUM >= 100000 + " FROM pg_catalog.pg_stop_backup(false, false)"; +#else + " FROM pg_catalog.pg_stop_backup(false)"; +#endif + else + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," + " lsn," + " labelfile," + " spcmapfile" +#if PG_VERSION_NUM >= 100000 + " FROM pg_catalog.pg_stop_backup(false, false)"; +#else + " FROM pg_catalog.pg_stop_backup(false)"; +#endif } else { - stop_backup_query = "SELECT" " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," " current_timestamp(0)::timestamptz," @@ -1734,15 +1651,24 @@ pg_stop_backup(pgBackup *backup) elog(ERROR, "Failed to send pg_stop_backup query"); } + /* After we have sent pg_stop_backup, we don't need this callback anymore */ + pgut_atexit_pop(backup_stopbackup_callback, pg_startbackup_conn); + /* - * Wait for the result of pg_stop_backup(), - * but no longer than PG_STOP_BACKUP_TIMEOUT seconds + * Wait for the result of pg_stop_backup(), but no longer than + * archive_timeout seconds */ if (pg_stop_backup_is_sent && !in_cleanup) { + res = NULL; + while (1) { - if (!PQconsumeInput(conn) || PQisBusy(conn)) + if (!PQconsumeInput(conn)) + elog(ERROR, "pg_stop backup() failed: %s", + PQerrorMessage(conn)); + + if (PQisBusy(conn)) { pg_stop_backup_timeout++; sleep(1); @@ -1757,14 +1683,14 @@ pg_stop_backup(pgBackup *backup) elog(INFO, "wait for pg_stop_backup()"); /* - * If postgres haven't answered in PG_STOP_BACKUP_TIMEOUT seconds, + * If postgres haven't answered in archive_timeout seconds, * send an interrupt. */ - if (pg_stop_backup_timeout > PG_STOP_BACKUP_TIMEOUT) + if (pg_stop_backup_timeout > instance_config.archive_timeout) { pgut_cancel(conn); elog(ERROR, "pg_stop_backup doesn't answer in %d seconds, cancel it", - PG_STOP_BACKUP_TIMEOUT); + instance_config.archive_timeout); } } else @@ -1781,8 +1707,11 @@ pg_stop_backup(pgBackup *backup) { switch (PQresultStatus(res)) { + /* + * We should expect only PGRES_TUPLES_OK since pg_stop_backup + * returns tuples. + */ case PGRES_TUPLES_OK: - case PGRES_COMMAND_OK: break; default: elog(ERROR, "query failed: %s query was: %s", @@ -1793,38 +1722,159 @@ pg_stop_backup(pgBackup *backup) backup_in_progress = false; +// char *target_lsn = "2/F578A000"; +// XLogDataFromLSN(target_lsn, &lsn_hi, &lsn_lo); + /* Extract timeline and LSN from results of pg_stop_backup() */ - XLogDataFromLSN(PQgetvalue(res, 0, 2), &xlogid, &xrecoff); + XLogDataFromLSN(PQgetvalue(res, 0, 2), &lsn_hi, &lsn_lo); /* Calculate LSN */ - stop_backup_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + stop_backup_lsn_tmp = ((uint64) lsn_hi) << 32 | lsn_lo; - if (!XRecOffIsValid(stop_backup_lsn)) + /* It is ok for replica to return invalid STOP LSN + * UPD: Apparently it is ok even for a master. + */ + if (!XRecOffIsValid(stop_backup_lsn_tmp)) { - stop_backup_lsn = restore_lsn; + char *xlog_path, + stream_xlog_path[MAXPGPATH]; + XLogSegNo segno = 0; + XLogRecPtr lsn_tmp = InvalidXLogRecPtr; + + /* + * Even though the value is invalid, it's expected postgres behaviour + * and we're trying to fix it below. + */ + elog(LOG, "Invalid offset in stop_lsn value %X/%X, trying to fix", + (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + /* + * Note: even with gdb it is very hard to produce automated tests for + * contrecord + invalid LSN, so emulate it for manual testing. + */ + //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE; + //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X", + // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + if (stream_wal) + { + pgBackupGetPath2(backup, stream_xlog_path, + lengthof(stream_xlog_path), + DATABASE_DIR, PG_XLOG_DIR); + xlog_path = stream_xlog_path; + } + else + xlog_path = arclog_path; + + GetXLogSegNo(stop_backup_lsn_tmp, segno, instance_config.xlog_seg_size); + + /* + * Note, that there is no guarantee that corresponding WAL file even exists. + * Replica may return LSN from future and keep staying in present. + * Or it can return invalid LSN. + * + * That's bad, since we want to get real LSN to save it in backup label file + * and to use it in WAL validation. + * + * So we try to do the following: + * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and + * look for the first valid record in it. + * It solves the problem of occasional invalid LSN on write-busy system. + * 2. Failing that, look for record in previous segment with endpoint + * equal or greater than stop_lsn. It may(!) solve the problem of invalid LSN + * on write-idle system. If that fails too, error out. + */ + + /* stop_lsn is pointing to a 0 byte of xlog segment */ + if (stop_backup_lsn_tmp % instance_config.xlog_seg_size == 0) + { + /* Wait for segment with current stop_lsn, it is ok for it to never arrive */ + wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + false, true, WARNING, stream_wal); + + /* Get the first record in segment with current stop_lsn */ + lsn_tmp = get_first_record_lsn(xlog_path, segno, backup->tli, + instance_config.xlog_seg_size, + instance_config.archive_timeout); + + /* Check that returned LSN is valid and greater than stop_lsn */ + if (XLogRecPtrIsInvalid(lsn_tmp) || + !XRecOffIsValid(lsn_tmp) || + lsn_tmp < stop_backup_lsn_tmp) + { + /* Backup from master should error out here */ + if (!backup->from_replica) + elog(ERROR, "Failed to get next WAL record after %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + + /* No luck, falling back to looking up for previous record */ + elog(WARNING, "Failed to get next WAL record after %X/%X, " + "looking for previous WAL record", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + + /* Despite looking for previous record there is not guarantee of success + * because previous record can be the contrecord. + */ + lsn_tmp = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + true, false, ERROR, stream_wal); + + /* sanity */ + if (!XRecOffIsValid(lsn_tmp) || XLogRecPtrIsInvalid(lsn_tmp)) + elog(ERROR, "Failed to get WAL record prior to %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + } + } + /* stop lsn is aligned to xlog block size, just find next lsn */ + else if (stop_backup_lsn_tmp % XLOG_BLCKSZ == 0) + { + /* Wait for segment with current stop_lsn */ + wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + false, true, ERROR, stream_wal); + + /* Get the next closest record in segment with current stop_lsn */ + lsn_tmp = get_next_record_lsn(xlog_path, segno, backup->tli, + instance_config.xlog_seg_size, + instance_config.archive_timeout, + stop_backup_lsn_tmp); + + /* sanity */ + if (!XRecOffIsValid(lsn_tmp) || XLogRecPtrIsInvalid(lsn_tmp)) + elog(ERROR, "Failed to get WAL record next to %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), + (uint32) (stop_backup_lsn_tmp)); + } + /* PostgreSQL returned something very illegal as STOP_LSN, error out */ + else + elog(ERROR, "Invalid stop_backup_lsn value %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); + + /* Setting stop_backup_lsn will set stop point for streaming */ + stop_backup_lsn = lsn_tmp; + stop_lsn_exists = true; } - if (!XRecOffIsValid(stop_backup_lsn)) - elog(ERROR, "Invalid stop_backup_lsn value %X/%X", - (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); + elog(LOG, "stop_lsn: %X/%X", + (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp)); /* Write backup_label and tablespace_map */ if (!exclusive_backup) { Assert(PQnfields(res) >= 4); - pgBackupGetPath(¤t, path, lengthof(path), DATABASE_DIR); + pgBackupGetPath(backup, path, lengthof(path), DATABASE_DIR); /* Write backup_label */ join_path_components(backup_label, path, PG_BACKUP_LABEL_FILE); - fp = fopen(backup_label, PG_BINARY_W); + fp = fio_fopen(backup_label, PG_BINARY_W, FIO_BACKUP_HOST); if (fp == NULL) elog(ERROR, "can't open backup label file \"%s\": %s", backup_label, strerror(errno)); len = strlen(PQgetvalue(res, 0, 3)); - if (fwrite(PQgetvalue(res, 0, 3), 1, len, fp) != len || - fflush(fp) != 0 || - fsync(fileno(fp)) != 0 || - fclose(fp)) + if (fio_fwrite(fp, PQgetvalue(res, 0, 3), len) != len || + fio_fflush(fp) != 0 || + fio_fclose(fp)) elog(ERROR, "can't write backup label file \"%s\": %s", backup_label, strerror(errno)); @@ -1834,10 +1884,13 @@ pg_stop_backup(pgBackup *backup) */ if (backup_files_list) { - file = pgFileNew(backup_label, true); - calc_file_checksum(file); - free(file->path); - file->path = strdup(PG_BACKUP_LABEL_FILE); + file = pgFileNew(backup_label, PG_BACKUP_LABEL_FILE, true, 0, + FIO_BACKUP_HOST); + + file->crc = pgFileGetCRC(backup_label, true, false); + + file->write_size = file->size; + file->uncompressed_size = file->size; parray_append(backup_files_list, file); } } @@ -1863,26 +1916,28 @@ pg_stop_backup(pgBackup *backup) char tablespace_map[MAXPGPATH]; join_path_components(tablespace_map, path, PG_TABLESPACE_MAP_FILE); - fp = fopen(tablespace_map, PG_BINARY_W); + fp = fio_fopen(tablespace_map, PG_BINARY_W, FIO_BACKUP_HOST); if (fp == NULL) elog(ERROR, "can't open tablespace map file \"%s\": %s", tablespace_map, strerror(errno)); len = strlen(val); - if (fwrite(val, 1, len, fp) != len || - fflush(fp) != 0 || - fsync(fileno(fp)) != 0 || - fclose(fp)) + if (fio_fwrite(fp, val, len) != len || + fio_fflush(fp) != 0 || + fio_fclose(fp)) elog(ERROR, "can't write tablespace map file \"%s\": %s", tablespace_map, strerror(errno)); if (backup_files_list) { - file = pgFileNew(tablespace_map, true); + file = pgFileNew(tablespace_map, PG_TABLESPACE_MAP_FILE, true, 0, + FIO_BACKUP_HOST); if (S_ISREG(file->mode)) - calc_file_checksum(file); - free(file->path); - file->path = strdup(PG_TABLESPACE_MAP_FILE); + { + file->crc = pgFileGetCRC(tablespace_map, true, false); + file->write_size = file->size; + } + parray_append(backup_files_list, file); } } @@ -1890,14 +1945,6 @@ pg_stop_backup(pgBackup *backup) if (tablespace_map_content) PQclear(tablespace_map_content); PQclear(res); - - if (stream_wal) - { - /* Wait for the completion of stream */ - pthread_join(stream_thread, NULL); - if (stream_thread_arg.ret == 1) - elog(ERROR, "WAL streaming failed"); - } } /* Fill in fields if that is the correct end of backup. */ @@ -1906,17 +1953,22 @@ pg_stop_backup(pgBackup *backup) char *xlog_path, stream_xlog_path[MAXPGPATH]; - /* Wait for stop_lsn to be received by replica */ - if (backup->from_replica) - wait_replica_wal_lsn(stop_backup_lsn, false); /* * Wait for stop_lsn to be archived or streamed. - * We wait for stop_lsn in stream mode just in case. + * If replica returned valid STOP_LSN of not actually existing record, + * look for previous record with endpoint >= STOP_LSN. */ - wait_wal_lsn(stop_backup_lsn, false); + if (!stop_lsn_exists) + stop_backup_lsn = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, + false, false, ERROR, stream_wal); if (stream_wal) { + /* Wait for the completion of stream */ + pthread_join(stream_thread, NULL); + if (stream_thread_arg.ret == 1) + elog(ERROR, "WAL streaming failed"); + pgBackupGetPath2(backup, stream_xlog_path, lengthof(stream_xlog_path), DATABASE_DIR, PG_XLOG_DIR); @@ -1925,26 +1977,28 @@ pg_stop_backup(pgBackup *backup) else xlog_path = arclog_path; - backup->tli = get_current_timeline(false); backup->stop_lsn = stop_backup_lsn; + backup->recovery_xid = recovery_xid; elog(LOG, "Getting the Recovery Time from WAL"); + /* iterate over WAL from stop_backup lsn to start_backup lsn */ if (!read_recovery_info(xlog_path, backup->tli, + instance_config.xlog_seg_size, backup->start_lsn, backup->stop_lsn, - &backup->recovery_time, &backup->recovery_xid)) + &backup->recovery_time)) { + elog(LOG, "Failed to find Recovery Time in WAL, forced to trust current_timestamp"); backup->recovery_time = recovery_time; - backup->recovery_xid = recovery_xid; } } } /* - * Retreive checkpoint_timeout GUC value in seconds. + * Retrieve checkpoint_timeout GUC value in seconds. */ static int -checkpoint_timeout(void) +checkpoint_timeout(PGconn *backup_conn) { PGresult *res; const char *val; @@ -1987,30 +2041,10 @@ backup_cleanup(bool fatal, void *userdata) base36enc(current.start_time)); current.end_time = time(NULL); current.status = BACKUP_STATUS_ERROR; - pgBackupWriteBackupControlFile(¤t); - } - - /* - * If backup is in progress, notify stop of backup to PostgreSQL - */ - if (backup_in_progress) - { - elog(WARNING, "backup in progress, stop backup"); - pg_stop_backup(NULL); /* don't care stop_lsn on error case */ + write_backup(¤t, true); } } -/* - * Disconnect backup connection during quit pg_probackup. - */ -static void -backup_disconnect(bool fatal, void *userdata) -{ - pgut_disconnect(backup_conn); - if (master_conn) - pgut_disconnect(master_conn); -} - /* * Take a backup of the PGDATA at a file level. * Copy all directories and files listed in backup_files_list. @@ -2023,117 +2057,134 @@ static void * backup_files(void *arg) { int i; + char from_fullpath[MAXPGPATH]; + char to_fullpath[MAXPGPATH]; + static time_t prev_time; + backup_files_arg *arguments = (backup_files_arg *) arg; - int n_backup_files_list = parray_num(arguments->files_list); + int n_backup_files_list = parray_num(arguments->files_list); + + prev_time = current.start_time; /* backup a file */ for (i = 0; i < n_backup_files_list; i++) { - int ret; - struct stat buf; - pgFile *file = (pgFile *) parray_get(arguments->files_list, i); + pgFile *file = (pgFile *) parray_get(arguments->files_list, i); + pgFile *prev_file = NULL; + + /* We have already copied all directories */ + if (S_ISDIR(file->mode)) + continue; + + if (arguments->thread_num == 1) + { + /* update backup_content.control every 60 seconds */ + if ((difftime(time(NULL), prev_time)) > 60) + { + write_backup_filelist(¤t, arguments->files_list, arguments->from_root, + arguments->external_dirs, false); + /* update backup control file to update size info */ + write_backup(¤t, true); + + prev_time = time(NULL); + } + } - elog(VERBOSE, "Copying file: \"%s\" ", file->path); if (!pg_atomic_test_set_flag(&file->lock)) continue; /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "interrupted during backup"); if (progress) - elog(LOG, "Progress: (%d/%d). Process file \"%s\"", - i + 1, n_backup_files_list, file->path); + elog(INFO, "Progress: (%d/%d). Process file \"%s\"", + i + 1, n_backup_files_list, file->rel_path); - /* stat file to check its current state */ - ret = stat(file->path, &buf); - if (ret == -1) + /* Handle zero sized files */ + if (file->size == 0) { - if (errno == ENOENT) - { - /* - * If file is not found, this is not en error. - * It could have been deleted by concurrent postgres transaction. - */ - file->write_size = BYTES_INVALID; - elog(LOG, "File \"%s\" is not found", file->path); - continue; - } - else - { - elog(ERROR, - "can't stat file to backup \"%s\": %s", - file->path, strerror(errno)); - } - } - - /* We have already copied all directories */ - if (S_ISDIR(buf.st_mode)) + file->write_size = 0; continue; + } - if (S_ISREG(buf.st_mode)) + /* construct destination filepath */ + if (file->external_dir_num == 0) { - /* Check that file exist in previous backup */ - if (current.backup_mode != BACKUP_MODE_FULL) - { - char *relative; - pgFile key; - pgFile **prev_file; - - relative = GetRelativePath(file->path, arguments->from_root); - key.path = relative; - - prev_file = (pgFile **) parray_bsearch(arguments->prev_filelist, - &key, pgFileComparePath); - if (prev_file) - /* File exists in previous backup */ - file->exists_in_prev = true; - } - /* copy the file into backup */ - if (file->is_datafile && !file->is_cfs) - { - char to_path[MAXPGPATH]; + join_path_components(from_fullpath, arguments->from_root, file->rel_path); + join_path_components(to_fullpath, arguments->to_root, file->rel_path); + } + else + { + char external_dst[MAXPGPATH]; + char *external_path = parray_get(arguments->external_dirs, + file->external_dir_num - 1); - join_path_components(to_path, arguments->to_root, - file->path + strlen(arguments->from_root) + 1); + makeExternalDirPathByNum(external_dst, + arguments->external_prefix, + file->external_dir_num); - /* backup block by block if datafile AND not compressed by cfs*/ - if (!backup_data_file(arguments, to_path, file, - arguments->prev_start_lsn, - current.backup_mode, - compress_alg, compress_level)) - { - file->write_size = BYTES_INVALID; - elog(VERBOSE, "File \"%s\" was not copied to backup", file->path); - continue; - } - } - /* TODO: - * Check if file exists in previous backup - * If exists: - * if mtime > start_backup_time of parent backup, - * copy file to backup - * if mtime < start_backup_time - * calculate crc, compare crc to old file - * if crc is the same -> skip file - */ - else if (!copy_file(arguments->from_root, arguments->to_root, file)) + join_path_components(to_fullpath, external_dst, file->rel_path); + join_path_components(from_fullpath, external_path, file->rel_path); + } + + /* Encountered some strange beast */ + if (!S_ISREG(file->mode)) + elog(WARNING, "Unexpected type %d of file \"%s\", skipping", + file->mode, from_fullpath); + + /* Check that file exist in previous backup */ + if (current.backup_mode != BACKUP_MODE_FULL) + { + pgFile **prev_file_tmp = NULL; + prev_file_tmp = (pgFile **) parray_bsearch(arguments->prev_filelist, + file, pgFileCompareRelPathWithExternal); + if (prev_file_tmp) { - file->write_size = BYTES_INVALID; - elog(VERBOSE, "File \"%s\" was not copied to backup", file->path); - continue; + /* File exists in previous backup */ + file->exists_in_prev = true; + prev_file = *prev_file_tmp; } + } - elog(VERBOSE, "File \"%s\". Copied "INT64_FORMAT " bytes", - file->path, file->write_size); + /* backup file */ + if (file->is_datafile && !file->is_cfs) + { + backup_data_file(&(arguments->conn_arg), file, from_fullpath, to_fullpath, + arguments->prev_start_lsn, + current.backup_mode, + instance_config.compress_alg, + instance_config.compress_level, + arguments->nodeInfo->checksum_version, + arguments->nodeInfo->ptrack_version_num, + arguments->nodeInfo->ptrack_schema, + arguments->hdr_map, false); } else - elog(LOG, "unexpected file type %d", buf.st_mode); + { + backup_non_data_file(file, prev_file, from_fullpath, to_fullpath, + current.backup_mode, current.parent_backup, true); + } + + if (file->write_size == FILE_NOT_FOUND) + continue; + + if (file->write_size == BYTES_INVALID) + { + elog(VERBOSE, "Skipping the unchanged file: \"%s\"", from_fullpath); + continue; + } + + elog(VERBOSE, "File \"%s\". Copied "INT64_FORMAT " bytes", + from_fullpath, file->write_size); } + /* ssh connection to longer needed */ + fio_disconnect(); + /* Close connection */ - if (arguments->backup_conn) - pgut_disconnect(arguments->backup_conn); + if (arguments->conn_arg.conn) + pgut_disconnect(arguments->conn_arg.conn); /* Data files transferring is successful */ arguments->ret = 0; @@ -2148,8 +2199,8 @@ backup_files(void *arg) * - set flags for database directories * - set flags for datafiles */ -static void -parse_backup_filelist_filenames(parray *files, const char *root) +void +parse_filelist_filenames(parray *files, const char *root) { size_t i = 0; Oid unlogged_file_reloid = 0; @@ -2157,13 +2208,10 @@ parse_backup_filelist_filenames(parray *files, const char *root) while (i < parray_num(files)) { pgFile *file = (pgFile *) parray_get(files, i); - char *relative; int sscanf_result; - relative = GetRelativePath(file->path, root); - if (S_ISREG(file->mode) && - path_is_prefix_of_path(PG_TBLSPC_DIR, relative)) + path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path)) { /* * Found file in pg_tblspc/tblsOid/TABLESPACE_VERSION_DIRECTORY @@ -2178,20 +2226,21 @@ parse_backup_filelist_filenames(parray *files, const char *root) * Check that the file is located under * TABLESPACE_VERSION_DIRECTORY */ - sscanf_result = sscanf(relative, PG_TBLSPC_DIR "/%u/%s/%u", + sscanf_result = sscanf(file->rel_path, PG_TBLSPC_DIR "/%u/%s/%u", &tblspcOid, tmp_rel_path, &dbOid); /* Yes, it is */ if (sscanf_result == 2 && - strcmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY) == 0) - set_cfs_datafiles(files, root, relative, i); + strncmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY, + strlen(TABLESPACE_VERSION_DIRECTORY)) == 0) + set_cfs_datafiles(files, root, file->rel_path, i); } } if (S_ISREG(file->mode) && file->tblspcOid != 0 && file->name && file->name[0]) { - if (strcmp(file->forkName, "init") == 0) + if (file->forkName == init) { /* * Do not backup files of unlogged relations. @@ -2242,7 +2291,6 @@ set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i) int p; pgFile *prev_file; char *cfs_tblspc_path; - char *relative_prev_file; cfs_tblspc_path = strdup(relative); if(!cfs_tblspc_path) @@ -2254,22 +2302,21 @@ set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i) for (p = (int) i; p >= 0; p--) { prev_file = (pgFile *) parray_get(files, (size_t) p); - relative_prev_file = GetRelativePath(prev_file->path, root); - elog(VERBOSE, "Checking file in cfs tablespace %s", relative_prev_file); + elog(VERBOSE, "Checking file in cfs tablespace %s", prev_file->rel_path); - if (strstr(relative_prev_file, cfs_tblspc_path) != NULL) + if (strstr(prev_file->rel_path, cfs_tblspc_path) != NULL) { if (S_ISREG(prev_file->mode) && prev_file->is_datafile) { elog(VERBOSE, "Setting 'is_cfs' on file %s, name %s", - relative_prev_file, prev_file->name); + prev_file->rel_path, prev_file->name); prev_file->is_cfs = true; } } else { - elog(VERBOSE, "Breaking on %s", relative_prev_file); + elog(VERBOSE, "Breaking on %s", prev_file->rel_path); break; } } @@ -2283,7 +2330,7 @@ set_cfs_datafiles(parray *files, const char *root, char *relative, size_t i) void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) { - char *path; +// char *path; char *rel_path; BlockNumber blkno_inseg; int segno; @@ -2295,16 +2342,15 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) rel_path = relpathperm(rnode, forknum); if (segno > 0) - path = psprintf("%s/%s.%u", pgdata, rel_path, segno); + f.rel_path = psprintf("%s.%u", rel_path, segno); else - path = psprintf("%s/%s", pgdata, rel_path); + f.rel_path = rel_path; - pg_free(rel_path); + f.external_dir_num = 0; - f.path = path; /* backup_files_list should be sorted before */ file_item = (pgFile **) parray_bsearch(backup_files_list, &f, - pgFileComparePath); + pgFileCompareRelPathWithExternal); /* * If we don't have any record of this file in the file map, it means @@ -2324,137 +2370,9 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) pthread_mutex_unlock(&backup_pagemap_mutex); } - pg_free(path); -} - -/* - * Given a list of files in the instance to backup, build a pagemap for each - * data file that has ptrack. Result is saved in the pagemap field of pgFile. - * NOTE we rely on the fact that provided parray is sorted by file->path. - */ -static void -make_pagemap_from_ptrack(parray *files) -{ - size_t i; - Oid dbOid_with_ptrack_init = 0; - Oid tblspcOid_with_ptrack_init = 0; - char *ptrack_nonparsed = NULL; - size_t ptrack_nonparsed_size = 0; - - elog(LOG, "Compiling pagemap"); - for (i = 0; i < parray_num(files); i++) - { - pgFile *file = (pgFile *) parray_get(files, i); - size_t start_addr; - - /* - * If there is a ptrack_init file in the database, - * we must backup all its files, ignoring ptrack files for relations. - */ - if (file->is_database) - { - char *filename = strrchr(file->path, '/'); - - Assert(filename != NULL); - filename++; - - /* - * The function pg_ptrack_get_and_clear_db returns true - * if there was a ptrack_init file. - * Also ignore ptrack files for global tablespace, - * to avoid any possible specific errors. - */ - if ((file->tblspcOid == GLOBALTABLESPACE_OID) || - pg_ptrack_get_and_clear_db(file->dbOid, file->tblspcOid)) - { - dbOid_with_ptrack_init = file->dbOid; - tblspcOid_with_ptrack_init = file->tblspcOid; - } - } - - if (file->is_datafile) - { - if (file->tblspcOid == tblspcOid_with_ptrack_init && - file->dbOid == dbOid_with_ptrack_init) - { - /* ignore ptrack if ptrack_init exists */ - elog(VERBOSE, "Ignoring ptrack because of ptrack_init for file: %s", file->path); - file->pagemap_isabsent = true; - continue; - } - - /* get ptrack bitmap once for all segments of the file */ - if (file->segno == 0) - { - /* release previous value */ - pg_free(ptrack_nonparsed); - ptrack_nonparsed_size = 0; - - ptrack_nonparsed = pg_ptrack_get_and_clear(file->tblspcOid, file->dbOid, - file->relOid, &ptrack_nonparsed_size); - } - - if (ptrack_nonparsed != NULL) - { - /* - * pg_ptrack_get_and_clear() returns ptrack with VARHDR cutted out. - * Compute the beginning of the ptrack map related to this segment - * - * HEAPBLOCKS_PER_BYTE. Number of heap pages one ptrack byte can track: 8 - * RELSEG_SIZE. Number of Pages per segment: 131072 - * RELSEG_SIZE/HEAPBLOCKS_PER_BYTE. number of bytes in ptrack file needed - * to keep track on one relsegment: 16384 - */ - start_addr = (RELSEG_SIZE/HEAPBLOCKS_PER_BYTE)*file->segno; - - /* - * If file segment was created after we have read ptrack, - * we won't have a bitmap for this segment. - */ - if (start_addr > ptrack_nonparsed_size) - { - elog(VERBOSE, "Ptrack is missing for file: %s", file->path); - file->pagemap_isabsent = true; - } - else - { - - if (start_addr + RELSEG_SIZE/HEAPBLOCKS_PER_BYTE > ptrack_nonparsed_size) - { - file->pagemap.bitmapsize = ptrack_nonparsed_size - start_addr; - elog(VERBOSE, "pagemap size: %i", file->pagemap.bitmapsize); - } - else - { - file->pagemap.bitmapsize = RELSEG_SIZE/HEAPBLOCKS_PER_BYTE; - elog(VERBOSE, "pagemap size: %i", file->pagemap.bitmapsize); - } - - file->pagemap.bitmap = pg_malloc(file->pagemap.bitmapsize); - memcpy(file->pagemap.bitmap, ptrack_nonparsed+start_addr, file->pagemap.bitmapsize); - } - } - else - { - /* - * If ptrack file is missing, try to copy the entire file. - * It can happen in two cases: - * - files were created by commands that bypass buffer manager - * and, correspondingly, ptrack mechanism. - * i.e. CREATE DATABASE - * - target relation was deleted. - */ - elog(VERBOSE, "Ptrack is missing for file: %s", file->path); - file->pagemap_isabsent = true; - } - } - } - elog(LOG, "Pagemap compiled"); -// res = pgut_execute(backup_conn, "SET client_min_messages = warning;", 0, NULL, true); -// PQclear(pgut_execute(backup_conn, "CHECKPOINT;", 0, NULL, true)); + pg_free(rel_path); } - /* * Stop WAL streaming if current 'xlogpos' exceeds 'stop_backup_lsn', which is * set by pg_stop_backup(). @@ -2466,8 +2384,8 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) static XLogRecPtr prevpos = InvalidXLogRecPtr; /* check for interrupt */ - if (interrupted) - elog(ERROR, "Interrupted during backup"); + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during WAL streaming"); /* we assume that we get called once at the end of each segment */ if (segment_finished) @@ -2488,21 +2406,18 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) if (!XLogRecPtrIsInvalid(stop_backup_lsn)) { - if (xlogpos > stop_backup_lsn) + if (xlogpos >= stop_backup_lsn) { stop_stream_lsn = xlogpos; return true; } /* pg_stop_backup() was executed, wait for the completion of stream */ - if (stream_stop_timeout == 0) + if (stream_stop_begin == 0) { elog(INFO, "Wait for LSN %X/%X to be streamed", (uint32) (stop_backup_lsn >> 32), (uint32) stop_backup_lsn); - stream_stop_timeout = checkpoint_timeout(); - stream_stop_timeout = stream_stop_timeout + stream_stop_timeout * 0.1; - stream_stop_begin = time(NULL); } @@ -2524,30 +2439,36 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) static void * StreamLog(void *arg) { - XLogRecPtr startpos; - TimeLineID starttli; StreamThreadArg *stream_arg = (StreamThreadArg *) arg; - /* - * We must use startpos as start_lsn from start_backup - */ - startpos = current.start_lsn; - starttli = current.tli; - /* * Always start streaming at the beginning of a segment */ - startpos -= startpos % XLOG_SEG_SIZE; + stream_arg->startpos -= stream_arg->startpos % instance_config.xlog_seg_size; /* Initialize timeout */ - stream_stop_timeout = 0; stream_stop_begin = 0; +#if PG_VERSION_NUM >= 100000 + /* if slot name was not provided for temp slot, use default slot name */ + if (!replication_slot && temp_slot) + replication_slot = "pg_probackup_slot"; +#endif + + +#if PG_VERSION_NUM >= 110000 + /* Create temp repslot */ + if (temp_slot) + CreateReplicationSlot(stream_arg->conn, replication_slot, + NULL, temp_slot, true, true, false); +#endif + /* * Start the replication */ - elog(LOG, _("started streaming WAL at %X/%X (timeline %u)"), - (uint32) (startpos >> 32), (uint32) startpos, starttli); + elog(LOG, "started streaming WAL at %X/%X (timeline %u)", + (uint32) (stream_arg->startpos >> 32), (uint32) stream_arg->startpos, + stream_arg->starttli); #if PG_VERSION_NUM >= 90600 { @@ -2555,14 +2476,17 @@ StreamLog(void *arg) MemSet(&ctl, 0, sizeof(ctl)); - ctl.startpos = startpos; - ctl.timeline = starttli; + ctl.startpos = stream_arg->startpos; + ctl.timeline = stream_arg->starttli; ctl.sysidentifier = NULL; #if PG_VERSION_NUM >= 100000 ctl.walmethod = CreateWalDirectoryMethod(stream_arg->basedir, 0, true); ctl.replication_slot = replication_slot; ctl.stop_socket = PGINVALID_SOCKET; +#if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 110000 + ctl.temp_slot = temp_slot; +#endif #else ctl.basedir = (char *) stream_arg->basedir; #endif @@ -2583,14 +2507,14 @@ StreamLog(void *arg) #endif } #else - if(ReceiveXlogStream(stream_arg->conn, startpos, starttli, NULL, - (char *) stream_arg->basedir, stop_streaming, - standby_message_timeout, NULL, false, false) == false) + if(ReceiveXlogStream(stream_arg->conn, stream_arg->startpos, stream_arg->starttli, + NULL, (char *) stream_arg->basedir, stop_streaming, + standby_message_timeout, NULL, false, false) == false) elog(ERROR, "Problem in receivexlog"); #endif - elog(LOG, _("finished streaming WAL at %X/%X (timeline %u)"), - (uint32) (stop_stream_lsn >> 32), (uint32) stop_stream_lsn, starttli); + elog(LOG, "finished streaming WAL at %X/%X (timeline %u)", + (uint32) (stop_stream_lsn >> 32), (uint32) stop_stream_lsn, stream_arg->starttli); stream_arg->ret = 0; PQfinish(stream_arg->conn); @@ -2599,92 +2523,126 @@ StreamLog(void *arg) return NULL; } -/* - * Get lsn of the moment when ptrack was enabled the last time. - */ -static XLogRecPtr -get_last_ptrack_lsn(void) - +static void +check_external_for_tablespaces(parray *external_list, PGconn *backup_conn) { PGresult *res; - uint32 xlogid; - uint32 xrecoff; - XLogRecPtr lsn; + int i = 0; + int j = 0; + char *tablespace_path = NULL; + char *query = "SELECT pg_catalog.pg_tablespace_location(oid) " + "FROM pg_catalog.pg_tablespace " + "WHERE pg_catalog.pg_tablespace_location(oid) <> '';"; - res = pgut_execute(backup_conn, "select pg_catalog.pg_ptrack_control_lsn()", 0, NULL); + res = pgut_execute(backup_conn, query, 0, NULL); - /* Extract timeline and LSN from results of pg_start_backup() */ - XLogDataFromLSN(PQgetvalue(res, 0, 0), &xlogid, &xrecoff); - /* Calculate LSN */ - lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + /* Check successfull execution of query */ + if (!res) + elog(ERROR, "Failed to get list of tablespaces"); - PQclear(res); - return lsn; -} + for (i = 0; i < res->ntups; i++) + { + tablespace_path = PQgetvalue(res, i, 0); + Assert (strlen(tablespace_path) > 0); -char * -pg_ptrack_get_block(backup_files_arg *arguments, - Oid dbOid, - Oid tblsOid, - Oid relOid, - BlockNumber blknum, - size_t *result_size) -{ - PGresult *res; - char *params[4]; - char *result; + canonicalize_path(tablespace_path); - params[0] = palloc(64); - params[1] = palloc(64); - params[2] = palloc(64); - params[3] = palloc(64); + for (j = 0; j < parray_num(external_list); j++) + { + char *external_path = parray_get(external_list, j); + + if (path_is_prefix_of_path(external_path, tablespace_path)) + elog(ERROR, "External directory path (-E option) \"%s\" " + "contains tablespace \"%s\"", + external_path, tablespace_path); + if (path_is_prefix_of_path(tablespace_path, external_path)) + elog(WARNING, "External directory path (-E option) \"%s\" " + "is in tablespace directory \"%s\"", + tablespace_path, external_path); + } + } + PQclear(res); - /* - * Use tmp_conn, since we may work in parallel threads. - * We can connect to any database. - */ - sprintf(params[0], "%i", tblsOid); - sprintf(params[1], "%i", dbOid); - sprintf(params[2], "%i", relOid); - sprintf(params[3], "%u", blknum); + /* Check that external directories do not overlap */ + if (parray_num(external_list) < 2) + return; - if (arguments->backup_conn == NULL) + for (i = 0; i < parray_num(external_list); i++) { - arguments->backup_conn = pgut_connect(pgut_dbname); + char *external_path = parray_get(external_list, i); + + for (j = 0; j < parray_num(external_list); j++) + { + char *tmp_external_path = parray_get(external_list, j); + + /* skip yourself */ + if (j == i) + continue; + + if (path_is_prefix_of_path(external_path, tmp_external_path)) + elog(ERROR, "External directory path (-E option) \"%s\" " + "contain another external directory \"%s\"", + external_path, tmp_external_path); + + } } +} - if (arguments->cancel_conn == NULL) - arguments->cancel_conn = PQgetCancel(arguments->backup_conn); +/* + * Run IDENTIFY_SYSTEM through a given connection and + * check system identifier and timeline are matching + */ +void +IdentifySystem(StreamThreadArg *stream_thread_arg) +{ + PGresult *res; - //elog(LOG, "db %i pg_ptrack_get_block(%i, %i, %u)",dbOid, tblsOid, relOid, blknum); - res = pgut_execute_parallel(arguments->backup_conn, - arguments->cancel_conn, - "SELECT pg_catalog.pg_ptrack_get_block_2($1, $2, $3, $4)", - 4, (const char **)params, true); + uint64 stream_conn_sysidentifier = 0; + char *stream_conn_sysidentifier_str; + TimeLineID stream_conn_tli = 0; - if (PQnfields(res) != 1) + if (!CheckServerVersionForStreaming(stream_thread_arg->conn)) { - elog(VERBOSE, "cannot get file block for relation oid %u", - relOid); - return NULL; + PQfinish(stream_thread_arg->conn); + /* + * Error message already written in CheckServerVersionForStreaming(). + * There's no hope of recovering from a version mismatch, so don't + * retry. + */ + elog(ERROR, "Cannot continue backup because stream connect has failed."); } - if (PQgetisnull(res, 0, 0)) + /* + * Identify server, obtain server system identifier and timeline + */ + res = pgut_execute(stream_thread_arg->conn, "IDENTIFY_SYSTEM", 0, NULL); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) { - elog(VERBOSE, "cannot get file block for relation oid %u", - relOid); - return NULL; + elog(WARNING,"Could not send replication command \"%s\": %s", + "IDENTIFY_SYSTEM", PQerrorMessage(stream_thread_arg->conn)); + PQfinish(stream_thread_arg->conn); + elog(ERROR, "Cannot continue backup because stream connect has failed."); } - result = (char *) PQunescapeBytea((unsigned char *) PQgetvalue(res, 0, 0), - result_size); + stream_conn_sysidentifier_str = PQgetvalue(res, 0, 0); + stream_conn_tli = atoi(PQgetvalue(res, 0, 1)); - PQclear(res); + /* Additional sanity, primary for PG 9.5, + * where system id can be obtained only via "IDENTIFY SYSTEM" + */ + if (!parse_uint64(stream_conn_sysidentifier_str, &stream_conn_sysidentifier, 0)) + elog(ERROR, "%s is not system_identifier", stream_conn_sysidentifier_str); - pfree(params[0]); - pfree(params[1]); - pfree(params[2]); - pfree(params[3]); + if (stream_conn_sysidentifier != instance_config.system_identifier) + elog(ERROR, "System identifier mismatch. Connected PostgreSQL instance has system id: " + "" UINT64_FORMAT ". Expected: " UINT64_FORMAT ".", + stream_conn_sysidentifier, instance_config.system_identifier); - return result; + if (stream_conn_tli != current.tli) + elog(ERROR, "Timeline identifier mismatch. " + "Connected PostgreSQL instance has timeline id: %X. Expected: %X.", + stream_conn_tli, current.tli); + + PQclear(res); } diff --git a/src/catalog.c b/src/catalog.c index 51d791a70..e47f0367b 100644 --- a/src/catalog.c +++ b/src/catalog.c @@ -3,44 +3,139 @@ * catalog.c: backup catalog operation * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" +#include "access/timeline.h" #include -#include #include -#include #include -#include -#include -#include #include +#include "utils/file.h" +#include "utils/configuration.h" + +static pgBackup* get_closest_backup(timelineInfo *tlinfo); +static pgBackup* get_oldest_backup(timelineInfo *tlinfo); static const char *backupModes[] = {"", "PAGE", "PTRACK", "DELTA", "FULL"}; static pgBackup *readBackupControlFile(const char *path); static bool exit_hook_registered = false; -static char lock_file[MAXPGPATH]; +static parray *lock_files = NULL; + +static timelineInfo * +timelineInfoNew(TimeLineID tli) +{ + timelineInfo *tlinfo = (timelineInfo *) pgut_malloc(sizeof(timelineInfo)); + MemSet(tlinfo, 0, sizeof(timelineInfo)); + tlinfo->tli = tli; + tlinfo->switchpoint = InvalidXLogRecPtr; + tlinfo->parent_link = NULL; + tlinfo->xlog_filelist = parray_new(); + tlinfo->anchor_lsn = InvalidXLogRecPtr; + tlinfo->anchor_tli = 0; + tlinfo->n_xlog_files = 0; + return tlinfo; +} + +/* free timelineInfo object */ +void +timelineInfoFree(void *tliInfo) +{ + timelineInfo *tli = (timelineInfo *) tliInfo; + + parray_walk(tli->xlog_filelist, pgFileFree); + parray_free(tli->xlog_filelist); + + if (tli->backups) + { + parray_walk(tli->backups, pgBackupFree); + parray_free(tli->backups); + } + + pfree(tliInfo); +} +/* Iterate over locked backups and delete locks files */ static void unlink_lock_atexit(void) { - int res; - res = unlink(lock_file); - if (res != 0 && res != ENOENT) - elog(WARNING, "%s: %s", lock_file, strerror(errno)); + int i; + + if (lock_files == NULL) + return; + + for (i = 0; i < parray_num(lock_files); i++) + { + char *lock_file = (char *) parray_get(lock_files, i); + int res; + + res = fio_unlink(lock_file, FIO_BACKUP_HOST); + if (res != 0 && errno != ENOENT) + elog(WARNING, "%s: %s", lock_file, strerror(errno)); + } + + parray_walk(lock_files, pfree); + parray_free(lock_files); + lock_files = NULL; +} + +/* + * Read backup meta information from BACKUP_CONTROL_FILE. + * If no backup matches, return NULL. + */ +pgBackup * +read_backup(const char *root_dir) +{ + char conf_path[MAXPGPATH]; + + join_path_components(conf_path, root_dir, BACKUP_CONTROL_FILE); + + return readBackupControlFile(conf_path); } /* - * Create a lockfile. + * Save the backup status into BACKUP_CONTROL_FILE. + * + * We need to reread the backup using its ID and save it changing only its + * status. */ void -catalog_lock(void) +write_backup_status(pgBackup *backup, BackupStatus status, + const char *instance_name, bool strict) +{ + pgBackup *tmp; + + tmp = read_backup(backup->root_dir); + if (!tmp) + { + /* + * Silently exit the function, since read_backup already logged the + * warning message. + */ + return; + } + + backup->status = status; + tmp->status = backup->status; + tmp->root_dir = pgut_strdup(backup->root_dir); + + write_backup(tmp, strict); + + pgBackupFree(tmp); +} + +/* + * Create exclusive lockfile in the backup's directory. + */ +bool +lock_backup(pgBackup *backup, bool strict) { + char lock_file[MAXPGPATH]; int fd; char buffer[MAXPGPATH * 2 + 256]; int ntries; @@ -49,7 +144,7 @@ catalog_lock(void) pid_t my_pid, my_p_pid; - join_path_components(lock_file, backup_instance_path, BACKUP_CATALOG_PID); + join_path_components(lock_file, backup->root_dir, BACKUP_CATALOG_PID); /* * If the PID in the lockfile is our own PID or our parent's or @@ -94,7 +189,7 @@ catalog_lock(void) * Think not to make the file protection weaker than 0600. See * comments below. */ - fd = open(lock_file, O_RDWR | O_CREAT | O_EXCL, 0600); + fd = fio_open(lock_file, O_RDWR | O_CREAT | O_EXCL, FIO_BACKUP_HOST); if (fd >= 0) break; /* Success; exit the retry loop */ @@ -102,34 +197,34 @@ catalog_lock(void) * Couldn't create the pid file. Probably it already exists. */ if ((errno != EEXIST && errno != EACCES) || ntries > 100) - elog(ERROR, "could not create lock file \"%s\": %s", + elog(ERROR, "Could not create lock file \"%s\": %s", lock_file, strerror(errno)); /* * Read the file to get the old owner's PID. Note race condition * here: file might have been deleted since we tried to create it. */ - fd = open(lock_file, O_RDONLY, 0600); + fd = fio_open(lock_file, O_RDONLY, FIO_BACKUP_HOST); if (fd < 0) { if (errno == ENOENT) continue; /* race condition; try again */ - elog(ERROR, "could not open lock file \"%s\": %s", + elog(ERROR, "Could not open lock file \"%s\": %s", lock_file, strerror(errno)); } - if ((len = read(fd, buffer, sizeof(buffer) - 1)) < 0) - elog(ERROR, "could not read lock file \"%s\": %s", + if ((len = fio_read(fd, buffer, sizeof(buffer) - 1)) < 0) + elog(ERROR, "Could not read lock file \"%s\": %s", lock_file, strerror(errno)); - close(fd); + fio_close(fd); if (len == 0) - elog(ERROR, "lock file \"%s\" is empty", lock_file); + elog(ERROR, "Lock file \"%s\" is empty", lock_file); buffer[len] = '\0'; encoded_pid = atoi(buffer); if (encoded_pid <= 0) - elog(ERROR, "bogus data in lock file \"%s\": \"%s\"", + elog(ERROR, "Bogus data in lock file \"%s\": \"%s\"", lock_file, buffer); /* @@ -143,9 +238,21 @@ catalog_lock(void) */ if (encoded_pid != my_pid && encoded_pid != my_p_pid) { - if (kill(encoded_pid, 0) == 0 || - (errno != ESRCH && errno != EPERM)) - elog(ERROR, "lock file \"%s\" already exists", lock_file); + if (kill(encoded_pid, 0) == 0) + { + elog(WARNING, "Process %d is using backup %s and still is running", + encoded_pid, base36enc(backup->start_time)); + return false; + } + else + { + if (errno == ESRCH) + elog(WARNING, "Process %d which used backup %s no longer exists", + encoded_pid, base36enc(backup->start_time)); + else + elog(ERROR, "Failed to send signal 0 to a process %d: %s", + encoded_pid, strerror(errno)); + } } /* @@ -153,8 +260,8 @@ catalog_lock(void) * it. Need a loop because of possible race condition against other * would-be creators. */ - if (unlink(lock_file) < 0) - elog(ERROR, "could not remove old lock file \"%s\": %s", + if (fio_unlink(lock_file, FIO_BACKUP_HOST) < 0) + elog(ERROR, "Could not remove old lock file \"%s\": %s", lock_file, strerror(errno)); } @@ -164,34 +271,42 @@ catalog_lock(void) snprintf(buffer, sizeof(buffer), "%d\n", my_pid); errno = 0; - if (write(fd, buffer, strlen(buffer)) != strlen(buffer)) + if (fio_write(fd, buffer, strlen(buffer)) != strlen(buffer)) { int save_errno = errno; - close(fd); - unlink(lock_file); + fio_close(fd); + fio_unlink(lock_file, FIO_BACKUP_HOST); /* if write didn't set errno, assume problem is no disk space */ errno = save_errno ? save_errno : ENOSPC; - elog(ERROR, "could not write lock file \"%s\": %s", + + /* In lax mode if we failed to grab lock because of 'out of space error', + * then treat backup as locked. + * Only delete command should be run in lax mode. + */ + if (!strict && errno == ENOSPC) + return true; + + elog(ERROR, "Could not write lock file \"%s\": %s", lock_file, strerror(errno)); } - if (fsync(fd) != 0) + if (fio_flush(fd) != 0) { int save_errno = errno; - close(fd); - unlink(lock_file); + fio_close(fd); + fio_unlink(lock_file, FIO_BACKUP_HOST); errno = save_errno; - elog(ERROR, "could not write lock file \"%s\": %s", + elog(ERROR, "Could not write lock file \"%s\": %s", lock_file, strerror(errno)); } - if (close(fd) != 0) + if (fio_close(fd) != 0) { int save_errno = errno; - unlink(lock_file); + fio_unlink(lock_file, FIO_BACKUP_HOST); errno = save_errno; - elog(ERROR, "could not write lock file \"%s\": %s", + elog(ERROR, "Could not write lock file \"%s\": %s", lock_file, strerror(errno)); } @@ -203,22 +318,13 @@ catalog_lock(void) atexit(unlink_lock_atexit); exit_hook_registered = true; } -} -/* - * Read backup meta information from BACKUP_CONTROL_FILE. - * If no backup matches, return NULL. - */ -pgBackup * -read_backup(time_t timestamp) -{ - pgBackup tmp; - char conf_path[MAXPGPATH]; + /* Use parray so that the lock files are unlinked in a loop */ + if (lock_files == NULL) + lock_files = parray_new(); + parray_append(lock_files, pgut_strdup(lock_file)); - tmp.start_time = timestamp; - pgBackupGetPath(&tmp, conf_path, lengthof(conf_path), BACKUP_CONTROL_FILE); - - return readBackupControlFile(conf_path); + return true; } /* @@ -231,14 +337,77 @@ pgBackupGetBackupMode(pgBackup *backup) } static bool -IsDir(const char *dirpath, const char *entry) +IsDir(const char *dirpath, const char *entry, fio_location location) { char path[MAXPGPATH]; struct stat st; snprintf(path, MAXPGPATH, "%s/%s", dirpath, entry); - return stat(path, &st) == 0 && S_ISDIR(st.st_mode); + return fio_stat(path, &st, false, location) == 0 && S_ISDIR(st.st_mode); +} + +/* + * Create list of instances in given backup catalog. + * + * Returns parray of "InstanceConfig" structures, filled with + * actual config of each instance. + */ +parray * +catalog_get_instance_list(void) +{ + char path[MAXPGPATH]; + DIR *dir; + struct dirent *dent; + parray *instances; + + instances = parray_new(); + + /* open directory and list contents */ + join_path_components(path, backup_path, BACKUPS_DIR); + dir = opendir(path); + if (dir == NULL) + elog(ERROR, "Cannot open directory \"%s\": %s", + path, strerror(errno)); + + while (errno = 0, (dent = readdir(dir)) != NULL) + { + char child[MAXPGPATH]; + struct stat st; + InstanceConfig *instance; + + /* skip entries point current dir or parent dir */ + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + + join_path_components(child, path, dent->d_name); + + if (lstat(child, &st) == -1) + elog(ERROR, "Cannot stat file \"%s\": %s", + child, strerror(errno)); + + if (!S_ISDIR(st.st_mode)) + continue; + + instance = readInstanceConfigFile(dent->d_name); + + parray_append(instances, instance); + } + + /* TODO 3.0: switch to ERROR */ + if (parray_num(instances) == 0) + elog(WARNING, "This backup catalog contains no backup instances. Backup instance can be added via 'add-instance' command."); + + if (errno) + elog(ERROR, "Cannot read directory \"%s\": %s", + path, strerror(errno)); + + if (closedir(dir)) + elog(ERROR, "Cannot close directory \"%s\": %s", + path, strerror(errno)); + + return instances; } /* @@ -248,16 +417,19 @@ IsDir(const char *dirpath, const char *entry) * If valid backup id is passed only matching backup will be added to the list. */ parray * -catalog_get_backup_list(time_t requested_backup_id) +catalog_get_backup_list(const char *instance_name, time_t requested_backup_id) { - DIR *data_dir = NULL; - struct dirent *data_ent = NULL; - parray *backups = NULL; - pgBackup *backup = NULL; - int i; + DIR *data_dir = NULL; + struct dirent *data_ent = NULL; + parray *backups = NULL; + int i; + char backup_instance_path[MAXPGPATH]; + + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); /* open backup instance backups directory */ - data_dir = opendir(backup_instance_path); + data_dir = fio_opendir(backup_instance_path, FIO_BACKUP_HOST); if (data_dir == NULL) { elog(WARNING, "cannot open directory \"%s\": %s", backup_instance_path, @@ -267,13 +439,14 @@ catalog_get_backup_list(time_t requested_backup_id) /* scan the directory and list backups */ backups = parray_new(); - for (; (data_ent = readdir(data_dir)) != NULL; errno = 0) + for (; (data_ent = fio_readdir(data_dir)) != NULL; errno = 0) { - char backup_conf_path[MAXPGPATH]; - char data_path[MAXPGPATH]; + char backup_conf_path[MAXPGPATH]; + char data_path[MAXPGPATH]; + pgBackup *backup = NULL; /* skip not-directory entries and hidden entries */ - if (!IsDir(backup_instance_path, data_ent->d_name) + if (!IsDir(backup_instance_path, data_ent->d_name, FIO_BACKUP_HOST) || data_ent->d_name[0] == '.') continue; @@ -283,124 +456,1223 @@ catalog_get_backup_list(time_t requested_backup_id) /* read backup information from BACKUP_CONTROL_FILE */ snprintf(backup_conf_path, MAXPGPATH, "%s/%s", data_path, BACKUP_CONTROL_FILE); backup = readBackupControlFile(backup_conf_path); + + if (!backup) + { + backup = pgut_new(pgBackup); + pgBackupInit(backup); + backup->start_time = base36dec(data_ent->d_name); + } + else if (strcmp(base36enc(backup->start_time), data_ent->d_name) != 0) + { + elog(WARNING, "backup ID in control file \"%s\" doesn't match name of the backup folder \"%s\"", + base36enc(backup->start_time), backup_conf_path); + } + + backup->root_dir = pgut_strdup(data_path); + + backup->database_dir = pgut_malloc(MAXPGPATH); + join_path_components(backup->database_dir, backup->root_dir, DATABASE_DIR); + + /* Initialize page header map */ + init_header_map(backup); + + /* TODO: save encoded backup id */ backup->backup_id = backup->start_time; + if (requested_backup_id != INVALID_BACKUP_ID + && requested_backup_id != backup->start_time) + { + pgBackupFree(backup); + continue; + } + parray_append(backups, backup); + + if (errno && errno != ENOENT) + { + elog(WARNING, "cannot read data directory \"%s\": %s", + data_ent->d_name, strerror(errno)); + goto err_proc; + } + } + if (errno) + { + elog(WARNING, "cannot read backup root directory \"%s\": %s", + backup_instance_path, strerror(errno)); + goto err_proc; + } + + fio_closedir(data_dir); + data_dir = NULL; + + parray_qsort(backups, pgBackupCompareIdDesc); + + /* Link incremental backups with their ancestors.*/ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *curr = parray_get(backups, i); + pgBackup **ancestor; + pgBackup key; + + if (curr->backup_mode == BACKUP_MODE_FULL) + continue; + + key.start_time = curr->parent_backup; + ancestor = (pgBackup **) parray_bsearch(backups, &key, + pgBackupCompareIdDesc); + if (ancestor) + curr->parent_backup_link = *ancestor; + } + + return backups; + +err_proc: + if (data_dir) + fio_closedir(data_dir); + if (backups) + parray_walk(backups, pgBackupFree); + parray_free(backups); + + elog(ERROR, "Failed to get backup list"); + + return NULL; +} + +/* + * Create list of backup datafiles. + * If 'requested_backup_id' is INVALID_BACKUP_ID, exit with error. + * If valid backup id is passed only matching backup will be added to the list. + * TODO this function only used once. Is it really needed? + */ +parray * +get_backup_filelist(pgBackup *backup, bool strict) +{ + parray *files = NULL; + char backup_filelist_path[MAXPGPATH]; + + join_path_components(backup_filelist_path, backup->root_dir, DATABASE_FILE_LIST); + files = dir_read_file_list(NULL, NULL, backup_filelist_path, FIO_BACKUP_HOST, backup->content_crc); + + /* redundant sanity? */ + if (!files) + elog(strict ? ERROR : WARNING, "Failed to get file list for backup %s", base36enc(backup->start_time)); + + return files; +} - /* ignore corrupted backups */ - if (backup) +/* + * Lock list of backups. Function goes in backward direction. + */ +void +catalog_lock_backup_list(parray *backup_list, int from_idx, int to_idx, bool strict) +{ + int start_idx, + end_idx; + int i; + + if (parray_num(backup_list) == 0) + return; + + start_idx = Max(from_idx, to_idx); + end_idx = Min(from_idx, to_idx); + + for (i = start_idx; i >= end_idx; i--) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + if (!lock_backup(backup, strict)) + elog(ERROR, "Cannot lock backup %s directory", + base36enc(backup->start_time)); + } +} + +/* + * Find the latest valid child of latest valid FULL backup on given timeline + */ +pgBackup * +catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current_start_time) +{ + int i; + pgBackup *full_backup = NULL; + pgBackup *tmp_backup = NULL; + char *invalid_backup_id; + + /* backup_list is sorted in order of descending ID */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + if ((backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) && backup->tli == tli) + { + full_backup = backup; + break; + } + } + + /* Failed to find valid FULL backup to fulfill ancestor role */ + if (!full_backup) + return NULL; + + elog(LOG, "Latest valid FULL backup: %s", + base36enc(full_backup->start_time)); + + /* FULL backup is found, lets find his latest child */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* only valid descendants are acceptable for evaluation */ + if ((backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + switch (scan_parent_chain(backup, &tmp_backup)) + { + /* broken chain */ + case ChainIsBroken: + invalid_backup_id = base36enc_dup(tmp_backup->parent_backup); + + elog(WARNING, "Backup %s has missing parent: %s. Cannot be a parent", + base36enc(backup->start_time), invalid_backup_id); + pg_free(invalid_backup_id); + continue; + + /* chain is intact, but at least one parent is invalid */ + case ChainIsInvalid: + invalid_backup_id = base36enc_dup(tmp_backup->start_time); + + elog(WARNING, "Backup %s has invalid parent: %s. Cannot be a parent", + base36enc(backup->start_time), invalid_backup_id); + pg_free(invalid_backup_id); + continue; + + /* chain is ok */ + case ChainIsOk: + /* Yes, we could call is_parent() earlier - after choosing the ancestor, + * but this way we have an opportunity to detect and report all possible + * anomalies. + */ + if (is_parent(full_backup->start_time, backup, true)) + return backup; + } + } + /* skip yourself */ + else if (backup->start_time == current_start_time) + continue; + else + { + elog(WARNING, "Backup %s has status: %s. Cannot be a parent.", + base36enc(backup->start_time), status2str(backup->status)); + } + } + + return NULL; +} + +/* + * For multi-timeline chain, look up suitable parent for incremental backup. + * Multi-timeline chain has full backup and one or more descendants located + * on different timelines. + */ +pgBackup * +get_multi_timeline_parent(parray *backup_list, parray *tli_list, + TimeLineID current_tli, time_t current_start_time, + InstanceConfig *instance) +{ + int i; + timelineInfo *my_tlinfo = NULL; + timelineInfo *tmp_tlinfo = NULL; + pgBackup *ancestor_backup = NULL; + + /* there are no timelines in the archive */ + if (parray_num(tli_list) == 0) + return NULL; + + /* look for current timelineInfo */ + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + if (tlinfo->tli == current_tli) + { + my_tlinfo = tlinfo; + break; + } + } + + if (my_tlinfo == NULL) + return NULL; + + /* Locate tlinfo of suitable full backup. + * Consider this example: + * t3 s2-------X <-! We are here + * / + * t2 s1----D---*----E---> + * / + * t1--A--B--*---C-------> + * + * A, E - full backups + * B, C, D - incremental backups + * + * We must find A. + */ + tmp_tlinfo = my_tlinfo; + while (tmp_tlinfo->parent_link) + { + /* if timeline has backups, iterate over them */ + if (tmp_tlinfo->parent_link->backups) + { + for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i); + + if (backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) && + backup->stop_lsn <= tmp_tlinfo->switchpoint) + { + ancestor_backup = backup; + break; + } + } + } + + if (ancestor_backup) + break; + + tmp_tlinfo = tmp_tlinfo->parent_link; + } + + /* failed to find valid FULL backup on parent timelines */ + if (!ancestor_backup) + return NULL; + else + elog(LOG, "Latest valid full backup: %s, tli: %i", + base36enc(ancestor_backup->start_time), ancestor_backup->tli); + + /* At this point we found suitable full backup, + * now we must find his latest child, suitable to be + * parent of current incremental backup. + * Consider this example: + * t3 s2-------X <-! We are here + * / + * t2 s1----D---*----E---> + * / + * t1--A--B--*---C-------> + * + * A, E - full backups + * B, C, D - incremental backups + * + * We found A, now we must find D. + */ + + /* Optimistically, look on current timeline for valid incremental backup, child of ancestor */ + if (my_tlinfo->backups) + { + /* backups are sorted in descending order and we need latest valid */ + for (i = 0; i < parray_num(my_tlinfo->backups); i++) + { + pgBackup *tmp_backup = NULL; + pgBackup *backup = (pgBackup *) parray_get(my_tlinfo->backups, i); + + /* found suitable parent */ + if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk && + is_parent(ancestor_backup->start_time, backup, false)) + return backup; + } + } + + /* Iterate over parent timelines and look for a valid backup, child of ancestor */ + tmp_tlinfo = my_tlinfo; + while (tmp_tlinfo->parent_link) + { + + /* if timeline has backups, iterate over them */ + if (tmp_tlinfo->parent_link->backups) + { + for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++) + { + pgBackup *tmp_backup = NULL; + pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i); + + /* We are not interested in backups + * located outside of our timeline history + */ + if (backup->stop_lsn > tmp_tlinfo->switchpoint) + continue; + + if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk && + is_parent(ancestor_backup->start_time, backup, true)) + return backup; + } + } + + tmp_tlinfo = tmp_tlinfo->parent_link; + } + + return NULL; +} + +/* create backup directory in $BACKUP_PATH */ +int +pgBackupCreateDir(pgBackup *backup) +{ + int i; + char path[MAXPGPATH]; + parray *subdirs = parray_new(); + + parray_append(subdirs, pg_strdup(DATABASE_DIR)); + + /* Add external dirs containers */ + if (backup->external_dir_str) + { + parray *external_list; + + external_list = make_external_directory_list(backup->external_dir_str, + false); + for (i = 0; i < parray_num(external_list); i++) + { + char temp[MAXPGPATH]; + /* Numeration of externaldirs starts with 1 */ + makeExternalDirPathByNum(temp, EXTERNAL_DIR, i+1); + parray_append(subdirs, pg_strdup(temp)); + } + free_dir_list(external_list); + } + + pgBackupGetPath(backup, path, lengthof(path), NULL); + + if (!dir_is_empty(path, FIO_BACKUP_HOST)) + elog(ERROR, "backup destination is not empty \"%s\"", path); + + fio_mkdir(path, DIR_PERMISSION, FIO_BACKUP_HOST); + backup->root_dir = pgut_strdup(path); + + backup->database_dir = pgut_malloc(MAXPGPATH); + join_path_components(backup->database_dir, backup->root_dir, DATABASE_DIR); + + /* block header map */ + init_header_map(backup); + + /* create directories for actual backup files */ + for (i = 0; i < parray_num(subdirs); i++) + { + join_path_components(path, backup->root_dir, parray_get(subdirs, i)); + fio_mkdir(path, DIR_PERMISSION, FIO_BACKUP_HOST); + } + + free_dir_list(subdirs); + return 0; +} + +/* + * Create list of timelines. + * TODO: '.partial' and '.part' segno information should be added to tlinfo. + */ +parray * +catalog_get_timelines(InstanceConfig *instance) +{ + int i,j,k; + parray *xlog_files_list = parray_new(); + parray *timelineinfos; + parray *backups; + timelineInfo *tlinfo; + char arclog_path[MAXPGPATH]; + + /* for fancy reporting */ + char begin_segno_str[MAXFNAMELEN]; + char end_segno_str[MAXFNAMELEN]; + + /* read all xlog files that belong to this archive */ + sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance->name); + dir_list_file(xlog_files_list, arclog_path, false, false, false, false, true, 0, FIO_BACKUP_HOST); + parray_qsort(xlog_files_list, pgFileCompareName); + + timelineinfos = parray_new(); + tlinfo = NULL; + + /* walk through files and collect info about timelines */ + for (i = 0; i < parray_num(xlog_files_list); i++) + { + pgFile *file = (pgFile *) parray_get(xlog_files_list, i); + TimeLineID tli; + parray *timelines; + xlogFile *wal_file = NULL; + + /* + * Regular WAL file. + * IsXLogFileName() cannot be used here + */ + if (strspn(file->name, "0123456789ABCDEF") == XLOG_FNAME_LEN) + { + int result = 0; + uint32 log, seg; + XLogSegNo segno = 0; + char suffix[MAXFNAMELEN]; + + result = sscanf(file->name, "%08X%08X%08X.%s", + &tli, &log, &seg, (char *) &suffix); + + /* sanity */ + if (result < 3) + { + elog(WARNING, "unexpected WAL file name \"%s\"", file->name); + continue; + } + + /* get segno from log */ + GetXLogSegNoFromScrath(segno, log, seg, instance->xlog_seg_size); + + /* regular WAL file with suffix */ + if (result == 4) + { + /* backup history file. Currently we don't use them */ + if (IsBackupHistoryFileName(file->name)) + { + elog(VERBOSE, "backup history file \"%s\"", file->name); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + + /* append file to xlog file list */ + wal_file = palloc(sizeof(xlogFile)); + wal_file->file = *file; + wal_file->segno = segno; + wal_file->type = BACKUP_HISTORY_FILE; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + continue; + } + /* partial WAL segment */ + else if (IsPartialXLogFileName(file->name) || + IsPartialCompressXLogFileName(file->name)) + { + elog(VERBOSE, "partial WAL file \"%s\"", file->name); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + + /* append file to xlog file list */ + wal_file = palloc(sizeof(xlogFile)); + wal_file->file = *file; + wal_file->segno = segno; + wal_file->type = PARTIAL_SEGMENT; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + continue; + } + /* temp WAL segment */ + else if (IsTempXLogFileName(file->name) || + IsTempCompressXLogFileName(file->name)) + { + elog(VERBOSE, "temp WAL file \"%s\"", file->name); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + + /* append file to xlog file list */ + wal_file = palloc(sizeof(xlogFile)); + wal_file->file = *file; + wal_file->segno = segno; + wal_file->type = TEMP_SEGMENT; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + continue; + } + /* we only expect compressed wal files with .gz suffix */ + else if (strcmp(suffix, "gz") != 0) + { + elog(WARNING, "unexpected WAL file name \"%s\"", file->name); + continue; + } + } + + /* new file belongs to new timeline */ + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + } + /* + * As it is impossible to detect if segments before segno are lost, + * or just do not exist, do not report them as lost. + */ + else if (tlinfo->n_xlog_files != 0) + { + /* check, if segments are consequent */ + XLogSegNo expected_segno = tlinfo->end_segno + 1; + + /* + * Some segments are missing. remember them in lost_segments to report. + * Normally we expect that segment numbers form an increasing sequence, + * though it's legal to find two files with equal segno in case there + * are both compressed and non-compessed versions. For example + * 000000010000000000000002 and 000000010000000000000002.gz + * + */ + if (segno != expected_segno && segno != tlinfo->end_segno) + { + xlogInterval *interval = palloc(sizeof(xlogInterval));; + interval->begin_segno = expected_segno; + interval->end_segno = segno - 1; + + if (tlinfo->lost_segments == NULL) + tlinfo->lost_segments = parray_new(); + + parray_append(tlinfo->lost_segments, interval); + } + } + + if (tlinfo->begin_segno == 0) + tlinfo->begin_segno = segno; + + /* this file is the last for this timeline so far */ + tlinfo->end_segno = segno; + /* update counters */ + tlinfo->n_xlog_files++; + tlinfo->size += file->size; + + /* append file to xlog file list */ + wal_file = palloc(sizeof(xlogFile)); + wal_file->file = *file; + wal_file->segno = segno; + wal_file->type = SEGMENT; + wal_file->keep = false; + parray_append(tlinfo->xlog_filelist, wal_file); + } + /* timeline history file */ + else if (IsTLHistoryFileName(file->name)) + { + TimeLineHistoryEntry *tln; + + sscanf(file->name, "%08X.history", &tli); + timelines = read_timeline_history(arclog_path, tli, true); + + if (!tlinfo || tlinfo->tli != tli) + { + tlinfo = timelineInfoNew(tli); + parray_append(timelineinfos, tlinfo); + /* + * 1 is the latest timeline in the timelines list. + * 0 - is our timeline, which is of no interest here + */ + tln = (TimeLineHistoryEntry *) parray_get(timelines, 1); + tlinfo->switchpoint = tln->end; + tlinfo->parent_tli = tln->tli; + + /* find parent timeline to link it with this one */ + for (j = 0; j < parray_num(timelineinfos); j++) + { + timelineInfo *cur = (timelineInfo *) parray_get(timelineinfos, j); + if (cur->tli == tlinfo->parent_tli) + { + tlinfo->parent_link = cur; + break; + } + } + } + + parray_walk(timelines, pfree); + parray_free(timelines); + } + else + elog(WARNING, "unexpected WAL file name \"%s\"", file->name); + } + + /* save information about backups belonging to each timeline */ + backups = catalog_get_backup_list(instance->name, INVALID_BACKUP_ID); + + for (i = 0; i < parray_num(timelineinfos); i++) + { + timelineInfo *tlinfo = parray_get(timelineinfos, i); + for (j = 0; j < parray_num(backups); j++) + { + pgBackup *backup = parray_get(backups, j); + if (tlinfo->tli == backup->tli) + { + if (tlinfo->backups == NULL) + tlinfo->backups = parray_new(); + + parray_append(tlinfo->backups, backup); + } + } + } + + /* determine oldest backup and closest backup for every timeline */ + for (i = 0; i < parray_num(timelineinfos); i++) + { + timelineInfo *tlinfo = parray_get(timelineinfos, i); + + tlinfo->oldest_backup = get_oldest_backup(tlinfo); + tlinfo->closest_backup = get_closest_backup(tlinfo); + } + + /* determine which WAL segments must be kept because of wal retention */ + if (instance->wal_depth <= 0) + return timelineinfos; + + /* + * WAL retention for now is fairly simple. + * User can set only one parameter - 'wal-depth'. + * It determines how many latest valid(!) backups on timeline + * must have an ability to perform PITR: + * Consider the example: + * + * ---B1-------B2-------B3-------B4--------> WAL timeline1 + * + * If 'wal-depth' is set to 2, then WAL purge should produce the following result: + * + * B1 B2 B3-------B4--------> WAL timeline1 + * + * Only valid backup can satisfy 'wal-depth' condition, so if B3 is not OK or DONE, + * then WAL purge should produce the following result: + * B1 B2-------B3-------B4--------> WAL timeline1 + * + * Complicated cases, such as branched timelines are taken into account. + * wal-depth is applied to each timeline independently: + * + * |---------> WAL timeline2 + * ---B1---|---B2-------B3-------B4--------> WAL timeline1 + * + * after WAL purge with wal-depth=2: + * + * |---------> WAL timeline2 + * B1---| B2 B3-------B4--------> WAL timeline1 + * + * In this example WAL retention prevents purge of WAL required by tli2 + * to stay reachable from backup B on tli1. + * + * To protect WAL from purge we try to set 'anchor_lsn' and 'anchor_tli' in every timeline. + * They are usually comes from 'start-lsn' and 'tli' attributes of backup + * calculated by 'wal-depth' parameter. + * With 'wal-depth=2' anchor_backup in tli1 is B3. + + * If timeline has not enough valid backups to satisfy 'wal-depth' condition, + * then 'anchor_lsn' and 'anchor_tli' taken from from 'start-lsn' and 'tli + * attribute of closest_backup. + * The interval of WAL starting from closest_backup to switchpoint is + * saved into 'keep_segments' attribute. + * If there is several intermediate timelines between timeline and its closest_backup + * then on every intermediate timeline WAL interval between switchpoint + * and starting segment is placed in 'keep_segments' attributes: + * + * |---------> WAL timeline3 + * |------| B5-----B6--> WAL timeline2 + * B1---| B2 B3-------B4------------> WAL timeline1 + * + * On timeline where closest_backup is located the WAL interval between + * closest_backup and switchpoint is placed into 'keep_segments'. + * If timeline has no 'closest_backup', then 'wal-depth' rules cannot be applied + * to this timeline and its WAL must be purged by following the basic rules of WAL purging. + * + * Third part is handling of ARCHIVE backups. + * If B1 and B2 have ARCHIVE wal-mode, then we must preserve WAL intervals + * between start_lsn and stop_lsn for each of them in 'keep_segments'. + */ + + /* determine anchor_lsn and keep_segments for every timeline */ + for (i = 0; i < parray_num(timelineinfos); i++) + { + int count = 0; + timelineInfo *tlinfo = parray_get(timelineinfos, i); + + /* + * Iterate backward on backups belonging to this timeline to find + * anchor_backup. NOTE Here we rely on the fact that backups list + * is ordered by start_lsn DESC. + */ + if (tlinfo->backups) + { + for (j = 0; j < parray_num(tlinfo->backups); j++) + { + pgBackup *backup = parray_get(tlinfo->backups, j); + + /* sanity */ + if (XLogRecPtrIsInvalid(backup->start_lsn) || + backup->tli <= 0) + continue; + + /* skip invalid backups */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + continue; + + /* + * Pinned backups should be ignored for the + * purpose of retention fulfillment, so skip them. + */ + if (backup->expire_time > 0 && + backup->expire_time > current_time) + { + elog(LOG, "Pinned backup %s is ignored for the " + "purpose of WAL retention", + base36enc(backup->start_time)); + continue; + } + + count++; + + if (count == instance->wal_depth) + { + elog(LOG, "On timeline %i WAL is protected from purge at %X/%X", + tlinfo->tli, + (uint32) (backup->start_lsn >> 32), + (uint32) (backup->start_lsn)); + + tlinfo->anchor_lsn = backup->start_lsn; + tlinfo->anchor_tli = backup->tli; + break; + } + } + } + + /* + * Failed to find anchor backup for this timeline. + * We cannot just thrown it to the wolves, because by + * doing that we will violate our own guarantees. + * So check the existence of closest_backup for + * this timeline. If there is one, then + * set the 'anchor_lsn' and 'anchor_tli' to closest_backup + * 'start-lsn' and 'tli' respectively. + * |-------------B5----------> WAL timeline3 + * |-----|-------------------------> WAL timeline2 + * B1 B2---| B3 B4-------B6-----> WAL timeline1 + * + * wal-depth=2 + * + * If number of valid backups on timelines is less than 'wal-depth' + * then timeline must(!) stay reachable via parent timelines if any. + * If closest_backup is not available, then general WAL purge rules + * are applied. + */ + if (XLogRecPtrIsInvalid(tlinfo->anchor_lsn)) + { + /* + * Failed to find anchor_lsn in our own timeline. + * Consider the case: + * -------------------------------------> tli5 + * ----------------------------B4-------> tli4 + * S3`--------------> tli3 + * S1`------------S3---B3-------B6-> tli2 + * B1---S1-------------B2--------B5-----> tli1 + * + * B* - backups + * S* - switchpoints + * wal-depth=2 + * + * Expected result: + * TLI5 will be purged entirely + * B4-------> tli4 + * S2`--------------> tli3 + * S1`------------S2 B3-------B6-> tli2 + * B1---S1 B2--------B5-----> tli1 + */ + pgBackup *closest_backup = NULL; + xlogInterval *interval = NULL; + TimeLineID tli = 0; + /* check if tli has closest_backup */ + if (!tlinfo->closest_backup) + /* timeline has no closest_backup, wal retention cannot be + * applied to this timeline. + * Timeline will be purged up to oldest_backup if any or + * purge entirely if there is none. + * In example above: tli5 and tli4. + */ + continue; + + /* sanity for closest_backup */ + if (XLogRecPtrIsInvalid(tlinfo->closest_backup->start_lsn) || + tlinfo->closest_backup->tli <= 0) + continue; + + /* + * Set anchor_lsn and anchor_tli to protect whole timeline from purge + * In the example above: tli3. + */ + tlinfo->anchor_lsn = tlinfo->closest_backup->start_lsn; + tlinfo->anchor_tli = tlinfo->closest_backup->tli; + + /* closest backup may be located not in parent timeline */ + closest_backup = tlinfo->closest_backup; + + tli = tlinfo->tli; + + /* + * Iterate over parent timeline chain and + * look for timeline where closest_backup belong + */ + while (tlinfo->parent_link) + { + /* In case of intermediate timeline save to keep_segments + * begin_segno and switchpoint segment. + * In case of final timelines save to keep_segments + * closest_backup start_lsn segment and switchpoint segment. + */ + XLogRecPtr switchpoint = tlinfo->switchpoint; + + tlinfo = tlinfo->parent_link; + + if (tlinfo->keep_segments == NULL) + tlinfo->keep_segments = parray_new(); + + /* in any case, switchpoint segment must be added to interval */ + interval = palloc(sizeof(xlogInterval)); + GetXLogSegNo(switchpoint, interval->end_segno, instance->xlog_seg_size); + + /* Save [S1`, S2] to keep_segments */ + if (tlinfo->tli != closest_backup->tli) + interval->begin_segno = tlinfo->begin_segno; + /* Save [B1, S1] to keep_segments */ + else + GetXLogSegNo(closest_backup->start_lsn, interval->begin_segno, instance->xlog_seg_size); + + /* + * TODO: check, maybe this interval is already here or + * covered by other larger interval. + */ + + GetXLogFileName(begin_segno_str, tlinfo->tli, interval->begin_segno, instance->xlog_seg_size); + GetXLogFileName(end_segno_str, tlinfo->tli, interval->end_segno, instance->xlog_seg_size); + + elog(LOG, "Timeline %i to stay reachable from timeline %i " + "protect from purge WAL interval between " + "%s and %s on timeline %i", + tli, closest_backup->tli, begin_segno_str, + end_segno_str, tlinfo->tli); + + parray_append(tlinfo->keep_segments, interval); + continue; + } + continue; + } + + /* Iterate over backups left */ + for (j = count; j < parray_num(tlinfo->backups); j++) + { + XLogSegNo segno = 0; + xlogInterval *interval = NULL; + pgBackup *backup = parray_get(tlinfo->backups, j); + + /* + * We must calculate keep_segments intervals for ARCHIVE backups + * with start_lsn less than anchor_lsn. + */ + + /* STREAM backups cannot contribute to keep_segments */ + if (backup->stream) + continue; + + /* sanity */ + if (XLogRecPtrIsInvalid(backup->start_lsn) || + backup->tli <= 0) + continue; + + /* no point in clogging keep_segments by backups protected by anchor_lsn */ + if (backup->start_lsn >= tlinfo->anchor_lsn) + continue; + + /* append interval to keep_segments */ + interval = palloc(sizeof(xlogInterval)); + GetXLogSegNo(backup->start_lsn, segno, instance->xlog_seg_size); + interval->begin_segno = segno; + GetXLogSegNo(backup->stop_lsn, segno, instance->xlog_seg_size); + + /* + * On replica it is possible to get STOP_LSN pointing to contrecord, + * so set end_segno to the next segment after STOP_LSN just to be safe. + */ + if (backup->from_replica) + interval->end_segno = segno + 1; + else + interval->end_segno = segno; + + GetXLogFileName(begin_segno_str, tlinfo->tli, interval->begin_segno, instance->xlog_seg_size); + GetXLogFileName(end_segno_str, tlinfo->tli, interval->end_segno, instance->xlog_seg_size); + + elog(LOG, "Archive backup %s to stay consistent " + "protect from purge WAL interval " + "between %s and %s on timeline %i", + base36enc(backup->start_time), + begin_segno_str, end_segno_str, backup->tli); + + if (tlinfo->keep_segments == NULL) + tlinfo->keep_segments = parray_new(); + + parray_append(tlinfo->keep_segments, interval); + } + } + + /* + * Protect WAL segments from deletion by setting 'keep' flag. + * We must keep all WAL segments after anchor_lsn (including), and also segments + * required by ARCHIVE backups for consistency - WAL between [start_lsn, stop_lsn]. + */ + for (i = 0; i < parray_num(timelineinfos); i++) + { + XLogSegNo anchor_segno = 0; + timelineInfo *tlinfo = parray_get(timelineinfos, i); + + /* + * At this point invalid anchor_lsn can be only in one case: + * timeline is going to be purged by regular WAL purge rules. + */ + if (XLogRecPtrIsInvalid(tlinfo->anchor_lsn)) + continue; + + /* + * anchor_lsn is located in another timeline, it means that the timeline + * will be protected from purge entirely. + */ + if (tlinfo->anchor_tli > 0 && tlinfo->anchor_tli != tlinfo->tli) + continue; + + GetXLogSegNo(tlinfo->anchor_lsn, anchor_segno, instance->xlog_seg_size); + + for (j = 0; j < parray_num(tlinfo->xlog_filelist); j++) + { + xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, j); + + if (wal_file->segno >= anchor_segno) + { + wal_file->keep = true; + continue; + } + + /* no keep segments */ + if (!tlinfo->keep_segments) + continue; + + /* Protect segments belonging to one of the keep invervals */ + for (k = 0; k < parray_num(tlinfo->keep_segments); k++) + { + xlogInterval *keep_segments = (xlogInterval *) parray_get(tlinfo->keep_segments, k); + + if ((wal_file->segno >= keep_segments->begin_segno) && + wal_file->segno <= keep_segments->end_segno) + { + wal_file->keep = true; + break; + } + } + } + } + + return timelineinfos; +} + +/* + * Iterate over parent timelines and look for valid backup + * closest to given timeline switchpoint. + * + * If such backup doesn't exist, it means that + * timeline is unreachable. Return NULL. + */ +pgBackup* +get_closest_backup(timelineInfo *tlinfo) +{ + pgBackup *closest_backup = NULL; + int i; + + /* + * Iterate over backups belonging to parent timelines + * and look for candidates. + */ + while (tlinfo->parent_link && !closest_backup) + { + parray *backup_list = tlinfo->parent_link->backups; + if (backup_list != NULL) { - if (requested_backup_id != INVALID_BACKUP_ID - && requested_backup_id != backup->start_time) + for (i = 0; i < parray_num(backup_list); i++) { - pgBackupFree(backup); - continue; + pgBackup *backup = parray_get(backup_list, i); + + /* + * Only valid backups made before switchpoint + * should be considered. + */ + if (!XLogRecPtrIsInvalid(backup->stop_lsn) && + XRecOffIsValid(backup->stop_lsn) && + backup->stop_lsn <= tlinfo->switchpoint && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + /* Check if backup is closer to switchpoint than current candidate */ + if (!closest_backup || backup->stop_lsn > closest_backup->stop_lsn) + closest_backup = backup; + } } - parray_append(backups, backup); - backup = NULL; } - if (errno && errno != ENOENT) - { - elog(WARNING, "cannot read data directory \"%s\": %s", - data_ent->d_name, strerror(errno)); - goto err_proc; - } - } - if (errno) - { - elog(WARNING, "cannot read backup root directory \"%s\": %s", - backup_instance_path, strerror(errno)); - goto err_proc; + /* Continue with parent */ + tlinfo = tlinfo->parent_link; } - closedir(data_dir); - data_dir = NULL; + return closest_backup; +} - parray_qsort(backups, pgBackupCompareIdDesc); +/* + * Find oldest backup in given timeline + * to determine what WAL segments of this timeline + * are reachable from backups belonging to it. + * + * If such backup doesn't exist, it means that + * there is no backups on this timeline. Return NULL. + */ +pgBackup* +get_oldest_backup(timelineInfo *tlinfo) +{ + pgBackup *oldest_backup = NULL; + int i; + parray *backup_list = tlinfo->backups; - /* Link incremental backups with their ancestors.*/ - for (i = 0; i < parray_num(backups); i++) + if (backup_list != NULL) { - pgBackup *curr = parray_get(backups, i); - - int j; - - if (curr->backup_mode == BACKUP_MODE_FULL) - continue; - - for (j = i+1; j < parray_num(backups); j++) + for (i = 0; i < parray_num(backup_list); i++) { - pgBackup *ancestor = parray_get(backups, j); + pgBackup *backup = parray_get(backup_list, i); - if (ancestor->start_time == curr->parent_backup) - { - curr->parent_backup_link = ancestor; - /* elog(INFO, "curr %s, ancestor %s j=%d", base36enc_dup(curr->start_time), - base36enc_dup(ancestor->start_time), j); */ - break; - } + /* Backups with invalid START LSN can be safely skipped */ + if (XLogRecPtrIsInvalid(backup->start_lsn) || + !XRecOffIsValid(backup->start_lsn)) + continue; + + /* + * Check if backup is older than current candidate. + * Here we use start_lsn for comparison, because backup that + * started earlier needs more WAL. + */ + if (!oldest_backup || backup->start_lsn < oldest_backup->start_lsn) + oldest_backup = backup; } } - return backups; + return oldest_backup; +} -err_proc: - if (data_dir) - closedir(data_dir); - if (backup) - pgBackupFree(backup); - if (backups) - parray_walk(backups, pgBackupFree); - parray_free(backups); +/* + * Overwrite backup metadata. + */ +void +do_set_backup(const char *instance_name, time_t backup_id, + pgSetBackupParams *set_backup_params) +{ + pgBackup *target_backup = NULL; + parray *backup_list = NULL; - elog(ERROR, "Failed to get backup list"); + if (!set_backup_params) + elog(ERROR, "Nothing to set by 'set-backup' command"); - return NULL; + backup_list = catalog_get_backup_list(instance_name, backup_id); + if (parray_num(backup_list) != 1) + elog(ERROR, "Failed to find backup %s", base36enc(backup_id)); + + target_backup = (pgBackup *) parray_get(backup_list, 0); + + /* Pin or unpin backup if requested */ + if (set_backup_params->ttl >= 0 || set_backup_params->expire_time > 0) + pin_backup(target_backup, set_backup_params); + + if (set_backup_params->note) + add_note(target_backup, set_backup_params->note); } /* - * Find the last completed backup on given timeline + * Set 'expire-time' attribute based on set_backup_params, or unpin backup + * if ttl is equal to zero. */ -pgBackup * -catalog_get_last_data_backup(parray *backup_list, TimeLineID tli) +void +pin_backup(pgBackup *target_backup, pgSetBackupParams *set_backup_params) { - int i; - pgBackup *backup = NULL; - /* backup_list is sorted in order of descending ID */ - for (i = 0; i < parray_num(backup_list); i++) + /* sanity, backup must have positive recovery-time */ + if (target_backup->recovery_time <= 0) + elog(ERROR, "Failed to set 'expire-time' for backup %s: invalid 'recovery-time'", + base36enc(target_backup->backup_id)); + + /* Pin comes from ttl */ + if (set_backup_params->ttl > 0) + target_backup->expire_time = target_backup->recovery_time + set_backup_params->ttl; + /* Unpin backup */ + else if (set_backup_params->ttl == 0) + { + /* If backup was not pinned in the first place, + * then there is nothing to unpin. + */ + if (target_backup->expire_time == 0) + { + elog(WARNING, "Backup %s is not pinned, nothing to unpin", + base36enc(target_backup->start_time)); + return; + } + target_backup->expire_time = 0; + } + /* Pin comes from expire-time */ + else if (set_backup_params->expire_time > 0) + target_backup->expire_time = set_backup_params->expire_time; + else + /* nothing to do */ + return; + + /* Update backup.control */ + write_backup(target_backup, true); + + if (set_backup_params->ttl > 0 || set_backup_params->expire_time > 0) { - backup = (pgBackup *) parray_get(backup_list, (size_t) i); + char expire_timestamp[100]; - if (backup->status == BACKUP_STATUS_OK && backup->tli == tli) - return backup; + time2iso(expire_timestamp, lengthof(expire_timestamp), target_backup->expire_time); + elog(INFO, "Backup %s is pinned until '%s'", base36enc(target_backup->start_time), + expire_timestamp); } + else + elog(INFO, "Backup %s is unpinned", base36enc(target_backup->start_time)); - return NULL; + return; } -/* create backup directory in $BACKUP_PATH */ -int -pgBackupCreateDir(pgBackup *backup) +/* + * Add note to backup metadata or unset already existing note. + * It is a job of the caller to make sure that note is not NULL. + */ +void +add_note(pgBackup *target_backup, char *note) { - int i; - char path[MAXPGPATH]; - char *subdirs[] = { DATABASE_DIR, NULL }; - - pgBackupGetPath(backup, path, lengthof(path), NULL); - if (!dir_is_empty(path)) - elog(ERROR, "backup destination is not empty \"%s\"", path); - - dir_create_dir(path, DIR_PERMISSION); + char *note_string; - /* create directories for actual backup files */ - for (i = 0; subdirs[i]; i++) + /* unset note */ + if (pg_strcasecmp(note, "none") == 0) + { + target_backup->note = NULL; + elog(INFO, "Removing note from backup %s", + base36enc(target_backup->start_time)); + } + else { - pgBackupGetPath(backup, path, lengthof(path), subdirs[i]); - dir_create_dir(path, DIR_PERMISSION); + /* Currently we do not allow string with newlines as note, + * because it will break parsing of backup.control. + * So if user provides string like this "aaa\nbbbbb", + * we save only "aaa" + * Example: tests.set_backup.SetBackupTest.test_add_note_newlines + */ + note_string = pgut_malloc(MAX_NOTE_SIZE); + sscanf(note, "%[^\n]", note_string); + + target_backup->note = note_string; + elog(INFO, "Adding note to backup %s: '%s'", + base36enc(target_backup->start_time), target_backup->note); } - return 0; + /* Update backup.control */ + write_backup(target_backup, true); } /* @@ -411,108 +1683,277 @@ pgBackupWriteControl(FILE *out, pgBackup *backup) { char timestamp[100]; - fprintf(out, "#Configuration\n"); - fprintf(out, "backup-mode = %s\n", pgBackupGetBackupMode(backup)); - fprintf(out, "stream = %s\n", backup->stream ? "true" : "false"); - fprintf(out, "compress-alg = %s\n", + fio_fprintf(out, "#Configuration\n"); + fio_fprintf(out, "backup-mode = %s\n", pgBackupGetBackupMode(backup)); + fio_fprintf(out, "stream = %s\n", backup->stream ? "true" : "false"); + fio_fprintf(out, "compress-alg = %s\n", deparse_compress_alg(backup->compress_alg)); - fprintf(out, "compress-level = %d\n", backup->compress_level); - fprintf(out, "from-replica = %s\n", backup->from_replica ? "true" : "false"); - - fprintf(out, "\n#Compatibility\n"); - fprintf(out, "block-size = %u\n", backup->block_size); - fprintf(out, "xlog-block-size = %u\n", backup->wal_block_size); - fprintf(out, "checksum-version = %u\n", backup->checksum_version); - fprintf(out, "program-version = %s\n", PROGRAM_VERSION); + fio_fprintf(out, "compress-level = %d\n", backup->compress_level); + fio_fprintf(out, "from-replica = %s\n", backup->from_replica ? "true" : "false"); + + fio_fprintf(out, "\n#Compatibility\n"); + fio_fprintf(out, "block-size = %u\n", backup->block_size); + fio_fprintf(out, "xlog-block-size = %u\n", backup->wal_block_size); + fio_fprintf(out, "checksum-version = %u\n", backup->checksum_version); + if (backup->program_version[0] != '\0') + fio_fprintf(out, "program-version = %s\n", backup->program_version); if (backup->server_version[0] != '\0') - fprintf(out, "server-version = %s\n", backup->server_version); + fio_fprintf(out, "server-version = %s\n", backup->server_version); - fprintf(out, "\n#Result backup info\n"); - fprintf(out, "timelineid = %d\n", backup->tli); + fio_fprintf(out, "\n#Result backup info\n"); + fio_fprintf(out, "timelineid = %d\n", backup->tli); /* LSN returned by pg_start_backup */ - fprintf(out, "start-lsn = %X/%X\n", + fio_fprintf(out, "start-lsn = %X/%X\n", (uint32) (backup->start_lsn >> 32), (uint32) backup->start_lsn); /* LSN returned by pg_stop_backup */ - fprintf(out, "stop-lsn = %X/%X\n", + fio_fprintf(out, "stop-lsn = %X/%X\n", (uint32) (backup->stop_lsn >> 32), (uint32) backup->stop_lsn); time2iso(timestamp, lengthof(timestamp), backup->start_time); - fprintf(out, "start-time = '%s'\n", timestamp); + fio_fprintf(out, "start-time = '%s'\n", timestamp); + if (backup->merge_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->merge_time); + fio_fprintf(out, "merge-time = '%s'\n", timestamp); + } if (backup->end_time > 0) { time2iso(timestamp, lengthof(timestamp), backup->end_time); - fprintf(out, "end-time = '%s'\n", timestamp); + fio_fprintf(out, "end-time = '%s'\n", timestamp); } - fprintf(out, "recovery-xid = " XID_FMT "\n", backup->recovery_xid); + fio_fprintf(out, "recovery-xid = " XID_FMT "\n", backup->recovery_xid); if (backup->recovery_time > 0) { time2iso(timestamp, lengthof(timestamp), backup->recovery_time); - fprintf(out, "recovery-time = '%s'\n", timestamp); + fio_fprintf(out, "recovery-time = '%s'\n", timestamp); + } + if (backup->expire_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->expire_time); + fio_fprintf(out, "expire-time = '%s'\n", timestamp); } + if (backup->merge_dest_backup != 0) + fio_fprintf(out, "merge-dest-id = '%s'\n", base36enc(backup->merge_dest_backup)); + /* * Size of PGDATA directory. The size does not include size of related * WAL segments in archive 'wal' directory. */ if (backup->data_bytes != BYTES_INVALID) - fprintf(out, "data-bytes = " INT64_FORMAT "\n", backup->data_bytes); + fio_fprintf(out, "data-bytes = " INT64_FORMAT "\n", backup->data_bytes); if (backup->wal_bytes != BYTES_INVALID) - fprintf(out, "wal-bytes = " INT64_FORMAT "\n", backup->wal_bytes); + fio_fprintf(out, "wal-bytes = " INT64_FORMAT "\n", backup->wal_bytes); + + if (backup->uncompressed_bytes >= 0) + fio_fprintf(out, "uncompressed-bytes = " INT64_FORMAT "\n", backup->uncompressed_bytes); + + if (backup->pgdata_bytes >= 0) + fio_fprintf(out, "pgdata-bytes = " INT64_FORMAT "\n", backup->pgdata_bytes); - fprintf(out, "status = %s\n", status2str(backup->status)); + fio_fprintf(out, "status = %s\n", status2str(backup->status)); /* 'parent_backup' is set if it is incremental backup */ if (backup->parent_backup != 0) - fprintf(out, "parent-backup-id = '%s'\n", base36enc(backup->parent_backup)); + fio_fprintf(out, "parent-backup-id = '%s'\n", base36enc(backup->parent_backup)); /* print connection info except password */ if (backup->primary_conninfo) - fprintf(out, "primary_conninfo = '%s'\n", backup->primary_conninfo); + fio_fprintf(out, "primary_conninfo = '%s'\n", backup->primary_conninfo); + + /* print external directories list */ + if (backup->external_dir_str) + fio_fprintf(out, "external-dirs = '%s'\n", backup->external_dir_str); + + if (backup->note) + fio_fprintf(out, "note = '%s'\n", backup->note); + + if (backup->content_crc != 0) + fio_fprintf(out, "content-crc = %u\n", backup->content_crc); + } -/* create BACKUP_CONTROL_FILE */ +/* + * Save the backup content into BACKUP_CONTROL_FILE. + * TODO: honor the strict flag + */ void -pgBackupWriteBackupControlFile(pgBackup *backup) +write_backup(pgBackup *backup, bool strict) { FILE *fp = NULL; - char ini_path[MAXPGPATH]; + char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; + char buf[4096]; - pgBackupGetPath(backup, ini_path, lengthof(ini_path), BACKUP_CONTROL_FILE); - fp = fopen(ini_path, "wt"); + join_path_components(path, backup->root_dir, BACKUP_CONTROL_FILE); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); + + fp = fopen(path_temp, PG_BINARY_W); if (fp == NULL) - elog(ERROR, "cannot open configuration file \"%s\": %s", ini_path, - strerror(errno)); + elog(ERROR, "Cannot open control file \"%s\": %s", + path_temp, strerror(errno)); + + if (chmod(path_temp, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", path_temp, + strerror(errno)); + + setvbuf(fp, buf, _IOFBF, sizeof(buf)); pgBackupWriteControl(fp, backup); - fclose(fp); + if (fflush(fp) != 0) + elog(ERROR, "Cannot flush control file \"%s\": %s", + path_temp, strerror(errno)); + + if (fsync(fileno(fp)) < 0) + elog(ERROR, "Cannot sync control file \"%s\": %s", + path_temp, strerror(errno)); + + if (fclose(fp) != 0) + elog(ERROR, "Cannot close control file \"%s\": %s", + path_temp, strerror(errno)); + + if (rename(path_temp, path) < 0) + elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno)); } /* * Output the list of files to backup catalog DATABASE_FILE_LIST */ void -pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) +write_backup_filelist(pgBackup *backup, parray *files, const char *root, + parray *external_list, bool sync) { - FILE *fp; - char path[MAXPGPATH]; + FILE *out; + char control_path[MAXPGPATH]; + char control_path_temp[MAXPGPATH]; + size_t i = 0; + #define BUFFERSZ 1024*1024 + char *buf; + int64 backup_size_on_disk = 0; + int64 uncompressed_size_on_disk = 0; + int64 wal_size_on_disk = 0; + + join_path_components(control_path, backup->root_dir, DATABASE_FILE_LIST); + snprintf(control_path_temp, sizeof(control_path_temp), "%s.tmp", control_path); + + out = fopen(control_path_temp, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open file list \"%s\": %s", control_path_temp, + strerror(errno)); + + if (chmod(control_path_temp, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", control_path_temp, + strerror(errno)); + + buf = pgut_malloc(BUFFERSZ); + setvbuf(out, buf, _IOFBF, BUFFERSZ); + + if (sync) + INIT_FILE_CRC32(true, backup->content_crc); + + /* print each file in the list */ + for (i = 0; i < parray_num(files); i++) + { + int len = 0; + char line[BLCKSZ]; + pgFile *file = (pgFile *) parray_get(files, i); - pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); + /* Ignore disappeared file */ + if (file->write_size == FILE_NOT_FOUND) + continue; - fp = fopen(path, "wt"); - if (fp == NULL) - elog(ERROR, "cannot open file list \"%s\": %s", path, - strerror(errno)); + if (S_ISDIR(file->mode)) + { + backup_size_on_disk += 4096; + uncompressed_size_on_disk += 4096; + } + + /* Count the amount of the data actually copied */ + if (S_ISREG(file->mode) && file->write_size > 0) + { + /* + * Size of WAL files in 'pg_wal' is counted separately + * TODO: in 3.0 add attribute is_walfile + */ + if (IsXLogFileName(file->name) && file->external_dir_num == 0) + wal_size_on_disk += file->write_size; + else + { + backup_size_on_disk += file->write_size; + uncompressed_size_on_disk += file->uncompressed_size; + } + } + + len = sprintf(line, "{\"path\":\"%s\", \"size\":\"" INT64_FORMAT "\", " + "\"mode\":\"%u\", \"is_datafile\":\"%u\", " + "\"is_cfs\":\"%u\", \"crc\":\"%u\", " + "\"compress_alg\":\"%s\", \"external_dir_num\":\"%d\", " + "\"dbOid\":\"%u\"", + file->rel_path, file->write_size, file->mode, + file->is_datafile ? 1 : 0, + file->is_cfs ? 1 : 0, + file->crc, + deparse_compress_alg(file->compress_alg), + file->external_dir_num, + file->dbOid); + + if (file->is_datafile) + len += sprintf(line+len, ",\"segno\":\"%d\"", file->segno); + + if (file->linked) + len += sprintf(line+len, ",\"linked\":\"%s\"", file->linked); + + if (file->n_blocks > 0) + len += sprintf(line+len, ",\"n_blocks\":\"%i\"", file->n_blocks); + + if (file->n_headers > 0) + { + len += sprintf(line+len, ",\"n_headers\":\"%i\"", file->n_headers); + len += sprintf(line+len, ",\"hdr_crc\":\"%u\"", file->hdr_crc); + len += sprintf(line+len, ",\"hdr_off\":\"%li\"", file->hdr_off); + len += sprintf(line+len, ",\"hdr_size\":\"%i\"", file->hdr_size); + } + + sprintf(line+len, "}\n"); + + if (sync) + COMP_FILE_CRC32(true, backup->content_crc, line, strlen(line)); + + fprintf(out, "%s", line); + } + + if (sync) + FIN_FILE_CRC32(true, backup->content_crc); + + if (fflush(out) != 0) + elog(ERROR, "Cannot flush file list \"%s\": %s", + control_path_temp, strerror(errno)); + + if (sync && fsync(fileno(out)) < 0) + elog(ERROR, "Cannot sync file list \"%s\": %s", + control_path_temp, strerror(errno)); + + if (fclose(out) != 0) + elog(ERROR, "Cannot close file list \"%s\": %s", + control_path_temp, strerror(errno)); - print_file_list(fp, files, root); + if (rename(control_path_temp, control_path) < 0) + elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s", + control_path_temp, control_path, strerror(errno)); - if (fflush(fp) != 0 || - fsync(fileno(fp)) != 0 || - fclose(fp)) - elog(ERROR, "cannot write file list \"%s\": %s", path, strerror(errno)); + /* use extra variable to avoid reset of previous data_bytes value in case of error */ + backup->data_bytes = backup_size_on_disk; + backup->uncompressed_bytes = uncompressed_size_on_disk; + + if (backup->stream) + backup->wal_bytes = wal_size_on_disk; + + free(buf); } /* @@ -529,23 +1970,28 @@ readBackupControlFile(const char *path) char *stop_lsn = NULL; char *status = NULL; char *parent_backup = NULL; + char *merge_dest_backup = NULL; char *program_version = NULL; char *server_version = NULL; char *compress_alg = NULL; int parsed_options; - pgut_option options[] = + ConfigOption options[] = { {'s', 0, "backup-mode", &backup_mode, SOURCE_FILE_STRICT}, {'u', 0, "timelineid", &backup->tli, SOURCE_FILE_STRICT}, {'s', 0, "start-lsn", &start_lsn, SOURCE_FILE_STRICT}, {'s', 0, "stop-lsn", &stop_lsn, SOURCE_FILE_STRICT}, {'t', 0, "start-time", &backup->start_time, SOURCE_FILE_STRICT}, + {'t', 0, "merge-time", &backup->merge_time, SOURCE_FILE_STRICT}, {'t', 0, "end-time", &backup->end_time, SOURCE_FILE_STRICT}, {'U', 0, "recovery-xid", &backup->recovery_xid, SOURCE_FILE_STRICT}, {'t', 0, "recovery-time", &backup->recovery_time, SOURCE_FILE_STRICT}, + {'t', 0, "expire-time", &backup->expire_time, SOURCE_FILE_STRICT}, {'I', 0, "data-bytes", &backup->data_bytes, SOURCE_FILE_STRICT}, {'I', 0, "wal-bytes", &backup->wal_bytes, SOURCE_FILE_STRICT}, + {'I', 0, "uncompressed-bytes", &backup->uncompressed_bytes, SOURCE_FILE_STRICT}, + {'I', 0, "pgdata-bytes", &backup->pgdata_bytes, SOURCE_FILE_STRICT}, {'u', 0, "block-size", &backup->block_size, SOURCE_FILE_STRICT}, {'u', 0, "xlog-block-size", &backup->wal_block_size, SOURCE_FILE_STRICT}, {'u', 0, "checksum-version", &backup->checksum_version, SOURCE_FILE_STRICT}, @@ -554,33 +2000,37 @@ readBackupControlFile(const char *path) {'b', 0, "stream", &backup->stream, SOURCE_FILE_STRICT}, {'s', 0, "status", &status, SOURCE_FILE_STRICT}, {'s', 0, "parent-backup-id", &parent_backup, SOURCE_FILE_STRICT}, + {'s', 0, "merge-dest-id", &merge_dest_backup, SOURCE_FILE_STRICT}, {'s', 0, "compress-alg", &compress_alg, SOURCE_FILE_STRICT}, {'u', 0, "compress-level", &backup->compress_level, SOURCE_FILE_STRICT}, {'b', 0, "from-replica", &backup->from_replica, SOURCE_FILE_STRICT}, {'s', 0, "primary-conninfo", &backup->primary_conninfo, SOURCE_FILE_STRICT}, + {'s', 0, "external-dirs", &backup->external_dir_str, SOURCE_FILE_STRICT}, + {'s', 0, "note", &backup->note, SOURCE_FILE_STRICT}, + {'u', 0, "content-crc", &backup->content_crc, SOURCE_FILE_STRICT}, {0} }; - if (access(path, F_OK) != 0) + pgBackupInit(backup); + if (fio_access(path, F_OK, FIO_BACKUP_HOST) != 0) { - elog(WARNING, "control file \"%s\" doesn't exist", path); + elog(WARNING, "Control file \"%s\" doesn't exist", path); pgBackupFree(backup); return NULL; } - pgBackupInit(backup); - parsed_options = pgut_readopt(path, options, WARNING); + parsed_options = config_read_opt(path, options, WARNING, true, true); if (parsed_options == 0) { - elog(WARNING, "control file \"%s\" is empty", path); + elog(WARNING, "Control file \"%s\" is empty", path); pgBackupFree(backup); return NULL; } if (backup->start_time == 0) { - elog(WARNING, "invalid ID/start-time, control file \"%s\" is corrupted", path); + elog(WARNING, "Invalid ID/start-time, control file \"%s\" is corrupted", path); pgBackupFree(backup); return NULL; } @@ -599,7 +2049,7 @@ readBackupControlFile(const char *path) if (sscanf(start_lsn, "%X/%X", &xlogid, &xrecoff) == 2) backup->start_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; else - elog(WARNING, "invalid START_LSN \"%s\"", start_lsn); + elog(WARNING, "Invalid START_LSN \"%s\"", start_lsn); free(start_lsn); } @@ -611,7 +2061,7 @@ readBackupControlFile(const char *path) if (sscanf(stop_lsn, "%X/%X", &xlogid, &xrecoff) == 2) backup->stop_lsn = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; else - elog(WARNING, "invalid STOP_LSN \"%s\"", stop_lsn); + elog(WARNING, "Invalid STOP_LSN \"%s\"", stop_lsn); free(stop_lsn); } @@ -625,6 +2075,8 @@ readBackupControlFile(const char *path) backup->status = BACKUP_STATUS_RUNNING; else if (strcmp(status, "MERGING") == 0) backup->status = BACKUP_STATUS_MERGING; + else if (strcmp(status, "MERGED") == 0) + backup->status = BACKUP_STATUS_MERGED; else if (strcmp(status, "DELETING") == 0) backup->status = BACKUP_STATUS_DELETING; else if (strcmp(status, "DELETED") == 0) @@ -636,7 +2088,7 @@ readBackupControlFile(const char *path) else if (strcmp(status, "CORRUPT") == 0) backup->status = BACKUP_STATUS_CORRUPT; else - elog(WARNING, "invalid STATUS \"%s\"", status); + elog(WARNING, "Invalid STATUS \"%s\"", status); free(status); } @@ -646,6 +2098,12 @@ readBackupControlFile(const char *path) free(parent_backup); } + if (merge_dest_backup) + { + backup->merge_dest_backup = base36dec(merge_dest_backup); + free(merge_dest_backup); + } + if (program_version) { StrNCpy(backup->program_version, program_version, @@ -722,7 +2180,7 @@ parse_compress_alg(const char *arg) len = strlen(arg); if (len == 0) - elog(ERROR, "compress algrorithm is empty"); + elog(ERROR, "compress algorithm is empty"); if (pg_strncasecmp("zlib", arg, len) == 0) return ZLIB_COMPRESS; @@ -753,6 +2211,27 @@ deparse_compress_alg(int alg) return NULL; } +/* + * Fill PGNodeInfo struct with default values. + */ +void +pgNodeInit(PGNodeInfo *node) +{ + node->block_size = 0; + node->wal_block_size = 0; + node->checksum_version = 0; + + node->is_superuser = false; + node->pgpro_support = false; + + node->server_version = 0; + node->server_version_str[0] = '\0'; + + node->ptrack_version_num = 0; + node->is_ptrack_enable = false; + node->ptrack_schema = NULL; +} + /* * Fill pgBackup struct with default values. */ @@ -766,12 +2245,16 @@ pgBackupInit(pgBackup *backup) backup->start_lsn = 0; backup->stop_lsn = 0; backup->start_time = (time_t) 0; + backup->merge_time = (time_t) 0; backup->end_time = (time_t) 0; backup->recovery_xid = 0; backup->recovery_time = (time_t) 0; + backup->expire_time = (time_t) 0; backup->data_bytes = BYTES_INVALID; backup->wal_bytes = BYTES_INVALID; + backup->uncompressed_bytes = 0; + backup->pgdata_bytes = 0; backup->compress_alg = COMPRESS_ALG_DEFAULT; backup->compress_level = COMPRESS_LEVEL_DEFAULT; @@ -783,24 +2266,17 @@ pgBackupInit(pgBackup *backup) backup->stream = false; backup->from_replica = false; backup->parent_backup = INVALID_BACKUP_ID; + backup->merge_dest_backup = INVALID_BACKUP_ID; backup->parent_backup_link = NULL; backup->primary_conninfo = NULL; backup->program_version[0] = '\0'; backup->server_version[0] = '\0'; -} - -/* - * Copy backup metadata from **src** into **dst**. - */ -void -pgBackupCopy(pgBackup *dst, pgBackup *src) -{ - pfree(dst->primary_conninfo); - - memcpy(dst, src, sizeof(pgBackup)); - - if (src->primary_conninfo) - dst->primary_conninfo = pstrdup(src->primary_conninfo); + backup->external_dir_str = NULL; + backup->root_dir = NULL; + backup->database_dir = NULL; + backup->files = NULL; + backup->note = NULL; + backup->content_crc = 0; } /* free pgBackup object */ @@ -809,8 +2285,12 @@ pgBackupFree(void *backup) { pgBackup *b = (pgBackup *) backup; - pfree(b->primary_conninfo); - pfree(backup); + pg_free(b->primary_conninfo); + pg_free(b->external_dir_str); + pg_free(b->root_dir); + pg_free(b->database_dir); + pg_free(b->note); + pg_free(backup); } /* Compare two pgBackup with their IDs (start time) in ascending order */ @@ -864,51 +2344,220 @@ pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, else snprintf(path, len, "%s/%s/%s/%s", backup_instance_path, base36enc(backup->start_time), subdir1, subdir2); +} + +/* + * independent from global variable backup_instance_path + * Still depends from backup_path + */ +void +pgBackupGetPathInInstance(const char *instance_name, + const pgBackup *backup, char *path, size_t len, + const char *subdir1, const char *subdir2) +{ + char backup_instance_path[MAXPGPATH]; + + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); - make_native_path(path); + /* If "subdir1" is NULL do not check "subdir2" */ + if (!subdir1) + snprintf(path, len, "%s/%s", backup_instance_path, + base36enc(backup->start_time)); + else if (!subdir2) + snprintf(path, len, "%s/%s/%s", backup_instance_path, + base36enc(backup->start_time), subdir1); + /* "subdir1" and "subdir2" is not NULL */ + else + snprintf(path, len, "%s/%s/%s/%s", backup_instance_path, + base36enc(backup->start_time), subdir1, subdir2); } -/* Find parent base FULL backup for current backup using parent_backup_link, - * return NULL if not found +/* + * Check if multiple backups consider target backup to be their direct parent */ -pgBackup* -find_parent_backup(pgBackup *current_backup) +bool +is_prolific(parray *backup_list, pgBackup *target_backup) { - pgBackup *base_full_backup = NULL; - base_full_backup = current_backup; + int i; + int child_counter = 0; - while (base_full_backup->backup_mode != BACKUP_MODE_FULL) + for (i = 0; i < parray_num(backup_list); i++) { - /* - * If we haven't found parent for incremental backup, - * mark it and all depending backups as orphaned - */ - if (base_full_backup->parent_backup_link == NULL - || (base_full_backup->status != BACKUP_STATUS_OK - && base_full_backup->status != BACKUP_STATUS_DONE)) + pgBackup *tmp_backup = (pgBackup *) parray_get(backup_list, i); + + /* consider only OK and DONE backups */ + if (tmp_backup->parent_backup == target_backup->start_time && + (tmp_backup->status == BACKUP_STATUS_OK || + tmp_backup->status == BACKUP_STATUS_DONE)) { - pgBackup *orphaned_backup = current_backup; + child_counter++; + if (child_counter > 1) + return true; + } + } - while (orphaned_backup != NULL) - { - orphaned_backup->status = BACKUP_STATUS_ORPHAN; - pgBackupWriteBackupControlFile(orphaned_backup); - if (base_full_backup->parent_backup_link == NULL) - elog(WARNING, "Backup %s is orphaned because its parent backup is not found", - base36enc(orphaned_backup->start_time)); - else - elog(WARNING, "Backup %s is orphaned because its parent backup is corrupted", - base36enc(orphaned_backup->start_time)); + return false; +} - orphaned_backup = orphaned_backup->parent_backup_link; - } +/* + * Find parent base FULL backup for current backup using parent_backup_link + */ +pgBackup* +find_parent_full_backup(pgBackup *current_backup) +{ + pgBackup *base_full_backup = NULL; + base_full_backup = current_backup; - base_full_backup = NULL; - break; - } + /* sanity */ + if (!current_backup) + elog(ERROR, "Target backup cannot be NULL"); + while (base_full_backup->parent_backup_link != NULL) + { base_full_backup = base_full_backup->parent_backup_link; } + if (base_full_backup->backup_mode != BACKUP_MODE_FULL) + { + if (base_full_backup->parent_backup) + elog(WARNING, "Backup %s is missing", + base36enc(base_full_backup->parent_backup)); + else + elog(WARNING, "Failed to find parent FULL backup for %s", + base36enc(current_backup->start_time)); + return NULL; + } + return base_full_backup; } + +/* + * Iterate over parent chain and look for any problems. + * Return 0 if chain is broken. + * result_backup must contain oldest existing backup after missing backup. + * we have no way to know if there are multiple missing backups. + * Return 1 if chain is intact, but at least one backup is !OK. + * result_backup must contain oldest !OK backup. + * Return 2 if chain is intact and all backups are OK. + * result_backup must contain FULL backup on which chain is based. + */ +int +scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup) +{ + pgBackup *target_backup = NULL; + pgBackup *invalid_backup = NULL; + + if (!current_backup) + elog(ERROR, "Target backup cannot be NULL"); + + target_backup = current_backup; + + while (target_backup->parent_backup_link) + { + if (target_backup->status != BACKUP_STATUS_OK && + target_backup->status != BACKUP_STATUS_DONE) + /* oldest invalid backup in parent chain */ + invalid_backup = target_backup; + + + target_backup = target_backup->parent_backup_link; + } + + /* Previous loop will skip FULL backup because his parent_backup_link is NULL */ + if (target_backup->backup_mode == BACKUP_MODE_FULL && + (target_backup->status != BACKUP_STATUS_OK && + target_backup->status != BACKUP_STATUS_DONE)) + { + invalid_backup = target_backup; + } + + /* found chain end and oldest backup is not FULL */ + if (target_backup->backup_mode != BACKUP_MODE_FULL) + { + /* Set oldest child backup in chain */ + *result_backup = target_backup; + return ChainIsBroken; + } + + /* chain is ok, but some backups are invalid */ + if (invalid_backup) + { + *result_backup = invalid_backup; + return ChainIsInvalid; + } + + *result_backup = target_backup; + return ChainIsOk; +} + +/* + * Determine if child_backup descend from parent_backup + * This check DO NOT(!!!) guarantee that parent chain is intact, + * because parent_backup can be missing. + * If inclusive is true, then child_backup counts as a child of himself + * if parent_backup_time is start_time of child_backup. + */ +bool +is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive) +{ + if (!child_backup) + elog(ERROR, "Target backup cannot be NULL"); + + if (inclusive && child_backup->start_time == parent_backup_time) + return true; + + while (child_backup->parent_backup_link && + child_backup->parent_backup != parent_backup_time) + { + child_backup = child_backup->parent_backup_link; + } + + if (child_backup->parent_backup == parent_backup_time) + return true; + + //if (inclusive && child_backup->start_time == parent_backup_time) + // return true; + + return false; +} + +/* + * Return backup index number. + * Note: this index number holds true until new sorting of backup list + */ +int +get_backup_index_number(parray *backup_list, pgBackup *backup) +{ + int i; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backup_list, i); + + if (tmp_backup->start_time == backup->start_time) + return i; + } + elog(WARNING, "Failed to find backup %s", base36enc(backup->start_time)); + return -1; +} + +/* On backup_list lookup children of target_backup and append them to append_list */ +void +append_children(parray *backup_list, pgBackup *target_backup, parray *append_list) +{ + int i; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* check if backup is descendant of target backup */ + if (is_parent(target_backup->start_time, backup, false)) + { + /* if backup is already in the list, then skip it */ + if (!parray_contains(append_list, backup)) + parray_append(append_list, backup); + } + } +} diff --git a/src/checkdb.c b/src/checkdb.c new file mode 100644 index 000000000..3a97fc2c7 --- /dev/null +++ b/src/checkdb.c @@ -0,0 +1,712 @@ +/*------------------------------------------------------------------------- + * + * src/checkdb.c + * pg_probackup checkdb subcommand + * + * It allows to validate all data files located in PGDATA + * via block checksums matching and page header sanity checks. + * Optionally all indexes in all databases in PostgreSQL + * instance can be logically verified using extensions + * amcheck or amcheck_next. + * + * Portions Copyright (c) 2019-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include +#include + +#include "utils/thread.h" +#include "utils/file.h" + + +typedef struct +{ + /* list of files to validate */ + parray *files_list; + /* if page checksums are enabled in this postgres instance? */ + uint32 checksum_version; + /* + * conn and cancel_conn + * to use in check_data_file + * to connect to postgres if we've failed to validate page + * and want to read it via buffer cache to ensure + */ + ConnectionArgs conn_arg; + /* number of thread for debugging */ + int thread_num; + /* pgdata path */ + const char *from_root; + /* + * Return value from the thread: + * 0 everything is ok + * 1 thread errored during execution, e.g. interruption (default value) + * 2 corruption is definitely(!) found + */ + int ret; +} check_files_arg; + + +typedef struct +{ + /* list of indexes to amcheck */ + parray *index_list; + /* + * credentials to connect to postgres instance + * used for compatibility checks of blocksize, + * server version and so on + */ + ConnectionOptions conn_opt; + /* + * conn and cancel_conn + * to use in threads to connect to databases + */ + ConnectionArgs conn_arg; + /* number of thread for debugging */ + int thread_num; + /* + * Return value from the thread: + * 0 everything is ok + * 1 thread errored during execution, e.g. interruption (default value) + * 2 corruption is definitely(!) found + */ + int ret; +} check_indexes_arg; + +typedef struct pg_indexEntry +{ + Oid indexrelid; + char *name; + char *namespace; + bool heapallindexed_is_supported; + /* schema where amcheck extension is located */ + char *amcheck_nspname; + /* lock for synchronization of parallel threads */ + volatile pg_atomic_flag lock; +} pg_indexEntry; + +static void +pg_indexEntry_free(void *index) +{ + pg_indexEntry *index_ptr; + + if (index == NULL) + return; + + index_ptr = (pg_indexEntry *) index; + + if (index_ptr->name) + free(index_ptr->name); + if (index_ptr->name) + free(index_ptr->namespace); + if (index_ptr->amcheck_nspname) + free(index_ptr->amcheck_nspname); + + free(index_ptr); +} + + +static void *check_files(void *arg); +static void do_block_validation(char *pgdata, uint32 checksum_version); + +static void *check_indexes(void *arg); +static parray* get_index_list(const char *dbname, bool first_db_with_amcheck, + PGconn *db_conn); +static bool amcheck_one_index(check_indexes_arg *arguments, + pg_indexEntry *ind); +static void do_amcheck(ConnectionOptions conn_opt, PGconn *conn); + +/* + * Check files in PGDATA. + * Read all files listed in files_list. + * If the file is 'datafile' (regular relation's main fork), read it page by page, + * verify checksum and copy. + */ +static void * +check_files(void *arg) +{ + int i; + check_files_arg *arguments = (check_files_arg *) arg; + int n_files_list = 0; + char from_fullpath[MAXPGPATH]; + + if (arguments->files_list) + n_files_list = parray_num(arguments->files_list); + + /* check a file */ + for (i = 0; i < n_files_list; i++) + { + pgFile *file = (pgFile *) parray_get(arguments->files_list, i); + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "interrupted during checkdb"); + + /* No need to check directories */ + if (S_ISDIR(file->mode)) + continue; + + if (!pg_atomic_test_set_flag(&file->lock)) + continue; + + join_path_components(from_fullpath, arguments->from_root, file->rel_path); + + elog(VERBOSE, "Checking file: \"%s\" ", from_fullpath); + + if (progress) + elog(INFO, "Progress: (%d/%d). Process file \"%s\"", + i + 1, n_files_list, from_fullpath); + + if (S_ISREG(file->mode)) + { + /* check only uncompressed by cfs datafiles */ + if (file->is_datafile && !file->is_cfs) + { + /* + * TODO deep inside check_data_file + * uses global variables to set connections. + * Need refactoring. + */ + if (!check_data_file(&(arguments->conn_arg), + file, from_fullpath, + arguments->checksum_version)) + arguments->ret = 2; /* corruption found */ + } + } + else + elog(WARNING, "unexpected file type %d", file->mode); + } + + /* Ret values: + * 0 everything is ok + * 1 thread errored during execution, e.g. interruption (default value) + * 2 corruption is definitely(!) found + */ + if (arguments->ret == 1) + arguments->ret = 0; + + return NULL; +} + +/* collect list of files and run threads to check files in the instance */ +static void +do_block_validation(char *pgdata, uint32 checksum_version) +{ + int i; + /* arrays with meta info for multi threaded check */ + pthread_t *threads; + check_files_arg *threads_args; + bool check_isok = true; + parray *files_list = NULL; + + /* initialize file list */ + files_list = parray_new(); + + /* list files with the logical path. omit $PGDATA */ + dir_list_file(files_list, pgdata, true, true, + false, false, true, 0, FIO_DB_HOST); + + /* + * Sort pathname ascending. + * + * For example: + * 1 - create 'base' + * 2 - create 'base/1' + */ + parray_qsort(files_list, pgFileCompareRelPathWithExternal); + /* Extract information about files in pgdata parsing their names:*/ + parse_filelist_filenames(files_list, pgdata); + + /* setup threads */ + for (i = 0; i < parray_num(files_list); i++) + { + pgFile *file = (pgFile *) parray_get(files_list, i); + pg_atomic_init_flag(&file->lock); + } + + /* Sort by size for load balancing */ + parray_qsort(files_list, pgFileCompareSize); + + /* init thread args with own file lists */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (check_files_arg *) palloc(sizeof(check_files_arg)*num_threads); + + for (i = 0; i < num_threads; i++) + { + check_files_arg *arg = &(threads_args[i]); + + arg->files_list = files_list; + arg->checksum_version = checksum_version; + arg->from_root = pgdata; + + arg->conn_arg.conn = NULL; + arg->conn_arg.cancel_conn = NULL; + + arg->thread_num = i + 1; + /* By default there is some error */ + arg->ret = 1; + } + + elog(INFO, "Start checking data files"); + + /* Run threads */ + for (i = 0; i < num_threads; i++) + { + check_files_arg *arg = &(threads_args[i]); + + elog(VERBOSE, "Start thread num: %i", i); + + pthread_create(&threads[i], NULL, check_files, arg); + } + + /* Wait threads */ + for (i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + if (threads_args[i].ret > 0) + check_isok = false; + } + + /* cleanup */ + if (files_list) + { + parray_walk(files_list, pgFileFree); + parray_free(files_list); + files_list = NULL; + } + + if (check_isok) + elog(INFO, "Data files are valid"); + else + elog(ERROR, "Checkdb failed"); +} + +/* Check indexes with amcheck */ +static void * +check_indexes(void *arg) +{ + int i; + check_indexes_arg *arguments = (check_indexes_arg *) arg; + int n_indexes = 0; + + if (arguments->index_list) + n_indexes = parray_num(arguments->index_list); + + for (i = 0; i < n_indexes; i++) + { + pg_indexEntry *ind = (pg_indexEntry *) parray_get(arguments->index_list, i); + + if (!pg_atomic_test_set_flag(&ind->lock)) + continue; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: interrupted during checkdb --amcheck", + arguments->thread_num); + + if (progress) + elog(INFO, "Thread [%d]. Progress: (%d/%d). Amchecking index '%s.%s'", + arguments->thread_num, i + 1, n_indexes, + ind->namespace, ind->name); + + if (arguments->conn_arg.conn == NULL) + { + + arguments->conn_arg.conn = pgut_connect(arguments->conn_opt.pghost, + arguments->conn_opt.pgport, + arguments->conn_opt.pgdatabase, + arguments->conn_opt.pguser); + arguments->conn_arg.cancel_conn = PQgetCancel(arguments->conn_arg.conn); + } + + /* remember that we have a failed check */ + if (!amcheck_one_index(arguments, ind)) + arguments->ret = 2; /* corruption found */ + } + + /* Close connection. */ + if (arguments->conn_arg.conn) + pgut_disconnect(arguments->conn_arg.conn); + + /* Ret values: + * 0 everything is ok + * 1 thread errored during execution, e.g. interruption (default value) + * 2 corruption is definitely(!) found + */ + if (arguments->ret == 1) + arguments->ret = 0; + + return NULL; +} + +/* Get index list for given database */ +static parray* +get_index_list(const char *dbname, bool first_db_with_amcheck, + PGconn *db_conn) +{ + PGresult *res; + char *amcheck_nspname = NULL; + int i; + bool heapallindexed_is_supported = false; + parray *index_list = NULL; + + res = pgut_execute(db_conn, "SELECT " + "extname, nspname, extversion " + "FROM pg_namespace n " + "JOIN pg_extension e " + "ON n.oid=e.extnamespace " + "WHERE e.extname IN ('amcheck', 'amcheck_next') " + "ORDER BY extversion DESC " + "LIMIT 1", + 0, NULL); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + PQclear(res); + elog(ERROR, "Cannot check if amcheck is installed in database %s: %s", + dbname, PQerrorMessage(db_conn)); + } + + if (PQntuples(res) < 1) + { + elog(WARNING, "Extension 'amcheck' or 'amcheck_next' are " + "not installed in database %s", dbname); + return NULL; + } + + amcheck_nspname = pgut_malloc(strlen(PQgetvalue(res, 0, 1)) + 1); + strcpy(amcheck_nspname, PQgetvalue(res, 0, 1)); + + /* heapallindexed_is_supported is database specific */ + if (strcmp(PQgetvalue(res, 0, 2), "1.0") != 0 && + strcmp(PQgetvalue(res, 0, 2), "1") != 0) + heapallindexed_is_supported = true; + + elog(INFO, "Amchecking database '%s' using extension '%s' " + "version %s from schema '%s'", + dbname, PQgetvalue(res, 0, 0), + PQgetvalue(res, 0, 2), PQgetvalue(res, 0, 1)); + + if (!heapallindexed_is_supported && heapallindexed) + elog(WARNING, "Extension '%s' version %s in schema '%s'" + "do not support 'heapallindexed' option", + PQgetvalue(res, 0, 0), PQgetvalue(res, 0, 2), + PQgetvalue(res, 0, 1)); + + /* + * In order to avoid duplicates, select global indexes + * (tablespace pg_global with oid 1664) only once. + * + * select only persistent btree indexes. + */ + if (first_db_with_amcheck) + { + + res = pgut_execute(db_conn, "SELECT cls.oid, cls.relname, nmspc.nspname " + "FROM pg_catalog.pg_index idx " + "LEFT JOIN pg_catalog.pg_class cls ON idx.indexrelid=cls.oid " + "LEFT JOIN pg_catalog.pg_namespace nmspc ON cls.relnamespace=nmspc.oid " + "LEFT JOIN pg_catalog.pg_am am ON cls.relam=am.oid " + "WHERE am.amname='btree' AND cls.relpersistence != 't' " + "ORDER BY nmspc.nspname DESC", + 0, NULL); + } + else + { + + res = pgut_execute(db_conn, "SELECT cls.oid, cls.relname, nmspc.nspname " + "FROM pg_catalog.pg_index idx " + "LEFT JOIN pg_catalog.pg_class cls ON idx.indexrelid=cls.oid " + "LEFT JOIN pg_catalog.pg_namespace nmspc ON cls.relnamespace=nmspc.oid " + "LEFT JOIN pg_catalog.pg_am am ON cls.relam=am.oid " + "WHERE am.amname='btree' AND cls.relpersistence != 't' AND " + "(cls.reltablespace IN " + "(SELECT oid from pg_catalog.pg_tablespace where spcname <> 'pg_global') " + "OR cls.reltablespace = 0) " + "ORDER BY nmspc.nspname DESC", + 0, NULL); + } + + /* add info needed to check indexes into index_list */ + for (i = 0; i < PQntuples(res); i++) + { + pg_indexEntry *ind = (pg_indexEntry *) pgut_malloc(sizeof(pg_indexEntry)); + char *name = NULL; + char *namespace = NULL; + + /* index oid */ + ind->indexrelid = atoi(PQgetvalue(res, i, 0)); + + /* index relname */ + name = PQgetvalue(res, i, 1); + ind->name = pgut_malloc(strlen(name) + 1); + strcpy(ind->name, name); /* enough buffer size guaranteed */ + + /* index namespace */ + namespace = PQgetvalue(res, i, 2); + ind->namespace = pgut_malloc(strlen(namespace) + 1); + strcpy(ind->namespace, namespace); /* enough buffer size guaranteed */ + + ind->heapallindexed_is_supported = heapallindexed_is_supported; + ind->amcheck_nspname = pgut_malloc(strlen(amcheck_nspname) + 1); + strcpy(ind->amcheck_nspname, amcheck_nspname); + pg_atomic_clear_flag(&ind->lock); + + if (index_list == NULL) + index_list = parray_new(); + + parray_append(index_list, ind); + } + + PQclear(res); + + return index_list; +} + +/* check one index. Return true if everything is ok, false otherwise. */ +static bool +amcheck_one_index(check_indexes_arg *arguments, + pg_indexEntry *ind) +{ + PGresult *res; + char *params[2]; + char *query = NULL; + + params[0] = palloc(64); + + /* first argument is index oid */ + sprintf(params[0], "%i", ind->indexrelid); + /* second argument is heapallindexed */ + params[1] = heapallindexed ? "true" : "false"; + + if (interrupted) + elog(ERROR, "Interrupted"); + + if (ind->heapallindexed_is_supported) + { + query = palloc(strlen(ind->amcheck_nspname)+strlen("SELECT .bt_index_check($1, $2)")+1); + sprintf(query, "SELECT %s.bt_index_check($1, $2)", ind->amcheck_nspname); + + res = pgut_execute_parallel(arguments->conn_arg.conn, + arguments->conn_arg.cancel_conn, + query, 2, (const char **)params, true, true, true); + } + else + { + query = palloc(strlen(ind->amcheck_nspname)+strlen("SELECT .bt_index_check($1)")+1); + sprintf(query, "SELECT %s.bt_index_check($1)", ind->amcheck_nspname); + + res = pgut_execute_parallel(arguments->conn_arg.conn, + arguments->conn_arg.cancel_conn, + query, 1, (const char **)params, true, true, true); + } + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog(WARNING, "Thread [%d]. Amcheck failed in database '%s' for index: '%s.%s': %s", + arguments->thread_num, arguments->conn_opt.pgdatabase, + ind->namespace, ind->name, PQresultErrorMessage(res)); + + pfree(params[0]); + pfree(query); + PQclear(res); + return false; + } + else + elog(LOG, "Thread [%d]. Amcheck succeeded in database '%s' for index: '%s.%s'", + arguments->thread_num, + arguments->conn_opt.pgdatabase, ind->namespace, ind->name); + + pfree(params[0]); + pfree(query); + PQclear(res); + return true; +} + +/* + * Entry point of checkdb --amcheck. + * + * Connect to all databases in the cluster + * and get list of persistent indexes, + * then run parallel threads to perform bt_index_check() + * for all indexes from the list. + * + * If amcheck extension is not installed in the database, + * skip this database and report it via warning message. + */ +static void +do_amcheck(ConnectionOptions conn_opt, PGconn *conn) +{ + int i; + /* arrays with meta info for multi threaded amcheck */ + pthread_t *threads; + check_indexes_arg *threads_args; + bool check_isok = true; + PGresult *res_db; + int n_databases = 0; + bool first_db_with_amcheck = true; + bool db_skipped = false; + + elog(INFO, "Start amchecking PostgreSQL instance"); + + res_db = pgut_execute(conn, + "SELECT datname, oid, dattablespace " + "FROM pg_database " + "WHERE datname NOT IN ('template0', 'template1')", + 0, NULL); + + /* we don't need this connection anymore */ + if (conn) + pgut_disconnect(conn); + + n_databases = PQntuples(res_db); + + /* For each database check indexes. In parallel. */ + for(i = 0; i < n_databases; i++) + { + int j; + const char *dbname; + PGconn *db_conn = NULL; + parray *index_list = NULL; + + dbname = PQgetvalue(res_db, i, 0); + db_conn = pgut_connect(conn_opt.pghost, conn_opt.pgport, + dbname, conn_opt.pguser); + + index_list = get_index_list(dbname, first_db_with_amcheck, + db_conn); + + /* we don't need this connection anymore */ + if (db_conn) + pgut_disconnect(db_conn); + + if (index_list == NULL) + { + db_skipped = true; + continue; + } + + first_db_with_amcheck = false; + + /* init thread args with own index lists */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (check_indexes_arg *) palloc(sizeof(check_indexes_arg)*num_threads); + + for (j = 0; j < num_threads; j++) + { + check_indexes_arg *arg = &(threads_args[j]); + + arg->index_list = index_list; + arg->conn_arg.conn = NULL; + arg->conn_arg.cancel_conn = NULL; + + arg->conn_opt.pghost = conn_opt.pghost; + arg->conn_opt.pgport = conn_opt.pgport; + arg->conn_opt.pgdatabase = dbname; + arg->conn_opt.pguser = conn_opt.pguser; + + arg->thread_num = j + 1; + /* By default there are some error */ + arg->ret = 1; + } + + /* Run threads */ + for (j = 0; j < num_threads; j++) + { + check_indexes_arg *arg = &(threads_args[j]); + elog(VERBOSE, "Start thread num: %i", j); + pthread_create(&threads[j], NULL, check_indexes, arg); + } + + /* Wait threads */ + for (j = 0; j < num_threads; j++) + { + pthread_join(threads[j], NULL); + if (threads_args[j].ret > 0) + check_isok = false; + } + + if (check_isok) + elog(INFO, "Amcheck succeeded for database '%s'", dbname); + else + elog(WARNING, "Amcheck failed for database '%s'", dbname); + + parray_walk(index_list, pg_indexEntry_free); + parray_free(index_list); + + if (interrupted) + break; + } + + /* cleanup */ + PQclear(res_db); + + /* Inform user about amcheck results */ + if (interrupted) + elog(ERROR, "checkdb --amcheck is interrupted."); + + if (check_isok) + { + elog(INFO, "checkdb --amcheck finished successfully. " + "All checked indexes are valid."); + + if (db_skipped) + elog(ERROR, "Some databases were not amchecked."); + else + elog(INFO, "All databases were amchecked."); + } + else + elog(ERROR, "checkdb --amcheck finished with failure. " + "Not all checked indexes are valid. %s", + db_skipped?"Some databases were not amchecked.": + "All databases were amchecked."); +} + +/* Entry point of pg_probackup CHECKDB subcommand */ +void +do_checkdb(bool need_amcheck, + ConnectionOptions conn_opt, char *pgdata) +{ + PGNodeInfo nodeInfo; + PGconn *cur_conn; + + /* Initialize PGInfonode */ + pgNodeInit(&nodeInfo); + + if (skip_block_validation && !need_amcheck) + elog(ERROR, "Option '--skip-block-validation' must be used with '--amcheck' option"); + + if (!skip_block_validation) + { + if (!pgdata) + elog(ERROR, "required parameter not specified: PGDATA " + "(-D, --pgdata)"); + + /* get node info */ + cur_conn = pgdata_basic_setup(conn_opt, &nodeInfo); + + /* ensure that conn credentials and pgdata are consistent */ + check_system_identifiers(cur_conn, pgdata); + + /* + * we don't need this connection anymore. + * block validation can last long time, + * so we don't hold the connection open, + * rather open new connection for amcheck + */ + if (cur_conn) + pgut_disconnect(cur_conn); + + do_block_validation(pgdata, nodeInfo.checksum_version); + } + + if (need_amcheck) + { + cur_conn = pgdata_basic_setup(conn_opt, &nodeInfo); + do_amcheck(conn_opt, cur_conn); + } +} diff --git a/src/configure.c b/src/configure.c index bb67ec5fd..1aae3df13 100644 --- a/src/configure.c +++ b/src/configure.c @@ -2,288 +2,678 @@ * * configure.c: - manage backup catalog. * - * Copyright (c) 2017-2018, Postgres Professional + * Copyright (c) 2017-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" -#include "utils/logger.h" -#include "pqexpbuffer.h" +#include +#include "utils/configuration.h" #include "utils/json.h" -static void opt_log_level_console(pgut_option *opt, const char *arg); -static void opt_log_level_file(pgut_option *opt, const char *arg); -static void opt_compress_alg(pgut_option *opt, const char *arg); +static void assign_log_level_console(ConfigOption *opt, const char *arg); +static void assign_log_level_file(ConfigOption *opt, const char *arg); +static void assign_compress_alg(ConfigOption *opt, const char *arg); + +static char *get_log_level_console(ConfigOption *opt); +static char *get_log_level_file(ConfigOption *opt); +static char *get_compress_alg(ConfigOption *opt); static void show_configure_start(void); static void show_configure_end(void); -static void show_configure(pgBackupConfig *config); -static void show_configure_json(pgBackupConfig *config); +static void show_configure_plain(ConfigOption *opt); +static void show_configure_json(ConfigOption *opt); + +#define RETENTION_REDUNDANCY_DEFAULT 0 +#define RETENTION_WINDOW_DEFAULT 0 -static pgBackupConfig *cur_config = NULL; +#define OPTION_INSTANCE_GROUP "Backup instance information" +#define OPTION_CONN_GROUP "Connection parameters" +#define OPTION_REPLICA_GROUP "Replica parameters" +#define OPTION_ARCHIVE_GROUP "Archive parameters" +#define OPTION_LOG_GROUP "Logging parameters" +#define OPTION_RETENTION_GROUP "Retention parameters" +#define OPTION_COMPRESS_GROUP "Compression parameters" +#define OPTION_REMOTE_GROUP "Remote access parameters" + +/* + * Short name should be non-printable ASCII character. + */ +ConfigOption instance_options[] = +{ + /* Instance options */ + { + 's', 'D', "pgdata", + &instance_config.pgdata, SOURCE_CMD, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + { + 'U', 200, "system-identifier", + &instance_config.system_identifier, SOURCE_FILE_STRICT, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, +#if PG_VERSION_NUM >= 110000 + { + 'u', 201, "xlog-seg-size", + &instance_config.xlog_seg_size, SOURCE_FILE_STRICT, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, +#endif + { + 's', 'E', "external-dirs", + &instance_config.external_dir_str, SOURCE_CMD, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + /* Connection options */ + { + 's', 'd', "pgdatabase", + &instance_config.conn_opt.pgdatabase, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'h', "pghost", + &instance_config.conn_opt.pghost, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'p', "pgport", + &instance_config.conn_opt.pgport, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'U', "pguser", + &instance_config.conn_opt.pguser, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + /* Replica options */ + { + 's', 202, "master-db", + &instance_config.master_conn_opt.pgdatabase, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 's', 203, "master-host", + &instance_config.master_conn_opt.pghost, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 's', 204, "master-port", + &instance_config.master_conn_opt.pgport, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 's', 205, "master-user", + &instance_config.master_conn_opt.pguser, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 'u', 206, "replica-timeout", + &instance_config.replica_timeout, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_REPLICA_GROUP, OPTION_UNIT_S, option_get_value + }, + /* Archive options */ + { + 'u', 207, "archive-timeout", + &instance_config.archive_timeout, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_ARCHIVE_GROUP, OPTION_UNIT_S, option_get_value + }, + { + 's', 208, "archive-host", + &instance_config.archive.host, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 209, "archive-port", + &instance_config.archive.port, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 210, "archive-user", + &instance_config.archive.user, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 211, "restore-command", + &instance_config.restore_command, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + /* Logging options */ + { + 'f', 212, "log-level-console", + assign_log_level_console, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, get_log_level_console + }, + { + 'f', 213, "log-level-file", + assign_log_level_file, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, get_log_level_file + }, + { + 's', 214, "log-filename", + &instance_config.logger.log_filename, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 215, "error-log-filename", + &instance_config.logger.error_log_filename, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 216, "log-directory", + &instance_config.logger.log_directory, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 'U', 217, "log-rotation-size", + &instance_config.logger.log_rotation_size, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_KB, option_get_value + }, + { + 'U', 218, "log-rotation-age", + &instance_config.logger.log_rotation_age, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_MS, option_get_value + }, + /* Retention options */ + { + 'u', 219, "retention-redundancy", + &instance_config.retention_redundancy, SOURCE_CMD, 0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 220, "retention-window", + &instance_config.retention_window, SOURCE_CMD, 0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 221, "wal-depth", + &instance_config.wal_depth, SOURCE_CMD, 0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + /* Compression options */ + { + 'f', 222, "compress-algorithm", + assign_compress_alg, SOURCE_CMD, 0, + OPTION_COMPRESS_GROUP, 0, get_compress_alg + }, + { + 'u', 223, "compress-level", + &instance_config.compress_level, SOURCE_CMD, 0, + OPTION_COMPRESS_GROUP, 0, option_get_value + }, + /* Remote backup options */ + { + 's', 224, "remote-proto", + &instance_config.remote.proto, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 225, "remote-host", + &instance_config.remote.host, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 226, "remote-port", + &instance_config.remote.port, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 227, "remote-path", + &instance_config.remote.path, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 228, "remote-user", + &instance_config.remote.user, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 229, "ssh-options", + &instance_config.remote.ssh_options, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 230, "ssh-config", + &instance_config.remote.ssh_config, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { 0 } +}; + +/* An instance configuration with default options */ +InstanceConfig instance_config; static PQExpBufferData show_buf; static int32 json_level = 0; +static const char *current_group = NULL; /* - * All this code needs refactoring. + * Show configure options including default values. */ +void +do_show_config(void) +{ + int i; + + show_configure_start(); -/* Set configure options */ -int -do_configure(bool show_only) + for (i = 0; instance_options[i].type; i++) + { + if (show_format == SHOW_PLAIN) + show_configure_plain(&instance_options[i]); + else + show_configure_json(&instance_options[i]); + } + + show_configure_end(); +} + +/* + * Save configure options into BACKUP_CATALOG_CONF_FILE. Do not save default + * values into the file. + */ +void +do_set_config(bool missing_ok) { - pgBackupConfig *config = readBackupCatalogConfigFile(); - if (pgdata) - config->pgdata = pgdata; - if (pgut_dbname) - config->pgdatabase = pgut_dbname; - if (host) - config->pghost = host; - if (port) - config->pgport = port; - if (username) - config->pguser = username; - - if (master_host) - config->master_host = master_host; - if (master_port) - config->master_port = master_port; - if (master_db) - config->master_db = master_db; - if (master_user) - config->master_user = master_user; - - if (replica_timeout) - config->replica_timeout = replica_timeout; - - if (archive_timeout) - config->archive_timeout = archive_timeout; + char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; + FILE *fp; + int i; - if (log_level_console) - config->log_level_console = log_level_console; - if (log_level_file) - config->log_level_file = log_level_file; - if (log_filename) - config->log_filename = log_filename; - if (error_log_filename) - config->error_log_filename = error_log_filename; - if (log_directory) - config->log_directory = log_directory; - if (log_rotation_size) - config->log_rotation_size = log_rotation_size; - if (log_rotation_age) - config->log_rotation_age = log_rotation_age; - - if (retention_redundancy) - config->retention_redundancy = retention_redundancy; - if (retention_window) - config->retention_window = retention_window; + join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); - if (compress_alg) - config->compress_alg = compress_alg; - if (compress_level) - config->compress_level = compress_level; + if (!missing_ok && !fileExists(path, FIO_LOCAL_HOST)) + elog(ERROR, "Configuration file \"%s\" doesn't exist", path); - if (show_only) - show_configure(config); - else - writeBackupCatalogConfigFile(config); + fp = fopen(path_temp, "wt"); + if (fp == NULL) + elog(ERROR, "Cannot create configuration file \"%s\": %s", + BACKUP_CATALOG_CONF_FILE, strerror(errno)); - return 0; + current_group = NULL; + + for (i = 0; instance_options[i].type; i++) + { + ConfigOption *opt = &instance_options[i]; + char *value; + + /* Save only options from command line */ + if (opt->source != SOURCE_CMD && + /* ...or options from the previous configure file */ + opt->source != SOURCE_FILE && opt->source != SOURCE_FILE_STRICT) + continue; + + value = opt->get_value(opt); + if (value == NULL) + continue; + + if (current_group == NULL || strcmp(opt->group, current_group) != 0) + { + current_group = opt->group; + fprintf(fp, "# %s\n", current_group); + } + + if (strchr(value, ' ')) + fprintf(fp, "%s = '%s'\n", opt->lname, value); + else + fprintf(fp, "%s = %s\n", opt->lname, value); + pfree(value); + } + + fclose(fp); + + if (rename(path_temp, path) < 0) + { + int errno_temp = errno; + unlink(path_temp); + elog(ERROR, "Cannot rename configuration file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno_temp)); + } } void -pgBackupConfigInit(pgBackupConfig *config) +init_config(InstanceConfig *config, const char *instance_name) { - config->system_identifier = 0; - config->pgdata = NULL; - config->pgdatabase = NULL; - config->pghost = NULL; - config->pgport = NULL; - config->pguser = NULL; - - config->master_host = NULL; - config->master_port = NULL; - config->master_db = NULL; - config->master_user = NULL; + MemSet(config, 0, sizeof(InstanceConfig)); + + config->name = pgut_strdup(instance_name); + + /* + * Starting from PostgreSQL 11 WAL segment size may vary. Prior to + * PostgreSQL 10 xlog_seg_size is equal to XLOG_SEG_SIZE. + */ +#if PG_VERSION_NUM >= 110000 + config->xlog_seg_size = 0; +#else + config->xlog_seg_size = XLOG_SEG_SIZE; +#endif + config->replica_timeout = REPLICA_TIMEOUT_DEFAULT; config->archive_timeout = ARCHIVE_TIMEOUT_DEFAULT; - config->log_level_console = LOG_LEVEL_CONSOLE_DEFAULT; - config->log_level_file = LOG_LEVEL_FILE_DEFAULT; - config->log_filename = LOG_FILENAME_DEFAULT; - config->error_log_filename = NULL; - config->log_directory = LOG_DIRECTORY_DEFAULT; - config->log_rotation_size = LOG_ROTATION_SIZE_DEFAULT; - config->log_rotation_age = LOG_ROTATION_AGE_DEFAULT; + /* Copy logger defaults */ + config->logger = logger_config; config->retention_redundancy = RETENTION_REDUNDANCY_DEFAULT; config->retention_window = RETENTION_WINDOW_DEFAULT; + config->wal_depth = 0; config->compress_alg = COMPRESS_ALG_DEFAULT; config->compress_level = COMPRESS_LEVEL_DEFAULT; + + config->remote.proto = (char*)"ssh"; } -void -writeBackupCatalogConfig(FILE *out, pgBackupConfig *config) +/* + * read instance config from file + */ +InstanceConfig * +readInstanceConfigFile(const char *instance_name) { - uint64 res; - const char *unit; - - fprintf(out, "#Backup instance info\n"); - fprintf(out, "PGDATA = %s\n", config->pgdata); - fprintf(out, "system-identifier = " UINT64_FORMAT "\n", config->system_identifier); - - fprintf(out, "#Connection parameters:\n"); - if (config->pgdatabase) - fprintf(out, "PGDATABASE = %s\n", config->pgdatabase); - if (config->pghost) - fprintf(out, "PGHOST = %s\n", config->pghost); - if (config->pgport) - fprintf(out, "PGPORT = %s\n", config->pgport); - if (config->pguser) - fprintf(out, "PGUSER = %s\n", config->pguser); - - fprintf(out, "#Replica parameters:\n"); - if (config->master_host) - fprintf(out, "master-host = %s\n", config->master_host); - if (config->master_port) - fprintf(out, "master-port = %s\n", config->master_port); - if (config->master_db) - fprintf(out, "master-db = %s\n", config->master_db); - if (config->master_user) - fprintf(out, "master-user = %s\n", config->master_user); - - convert_from_base_unit_u(config->replica_timeout, OPTION_UNIT_S, - &res, &unit); - fprintf(out, "replica-timeout = " UINT64_FORMAT "%s\n", res, unit); - - fprintf(out, "#Archive parameters:\n"); - convert_from_base_unit_u(config->archive_timeout, OPTION_UNIT_S, - &res, &unit); - fprintf(out, "archive-timeout = " UINT64_FORMAT "%s\n", res, unit); - - fprintf(out, "#Logging parameters:\n"); - fprintf(out, "log-level-console = %s\n", deparse_log_level(config->log_level_console)); - fprintf(out, "log-level-file = %s\n", deparse_log_level(config->log_level_file)); - fprintf(out, "log-filename = %s\n", config->log_filename); - if (config->error_log_filename) - fprintf(out, "error-log-filename = %s\n", config->error_log_filename); - - if (strcmp(config->log_directory, LOG_DIRECTORY_DEFAULT) == 0) - fprintf(out, "log-directory = %s/%s\n", backup_path, config->log_directory); - else - fprintf(out, "log-directory = %s\n", config->log_directory); - /* Convert values from base unit */ - convert_from_base_unit_u(config->log_rotation_size, OPTION_UNIT_KB, - &res, &unit); - fprintf(out, "log-rotation-size = " UINT64_FORMAT "%s\n", res, (res)?unit:"KB"); + char path[MAXPGPATH]; + InstanceConfig *instance = pgut_new(InstanceConfig); + char *log_level_console = NULL; + char *log_level_file = NULL; + char *compress_alg = NULL; + int parsed_options; + + ConfigOption instance_options[] = + { + /* Instance options */ + { + 's', 'D', "pgdata", + &instance->pgdata, SOURCE_CMD, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + { + 'U', 200, "system-identifier", + &instance->system_identifier, SOURCE_FILE_STRICT, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + #if PG_VERSION_NUM >= 110000 + { + 'u', 201, "xlog-seg-size", + &instance->xlog_seg_size, SOURCE_FILE_STRICT, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + #endif + { + 's', 'E', "external-dirs", + &instance->external_dir_str, SOURCE_CMD, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + /* Connection options */ + { + 's', 'd', "pgdatabase", + &instance->conn_opt.pgdatabase, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'h', "pghost", + &instance->conn_opt.pghost, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'p', "pgport", + &instance->conn_opt.pgport, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + { + 's', 'U', "pguser", + &instance->conn_opt.pguser, SOURCE_CMD, 0, + OPTION_CONN_GROUP, 0, option_get_value + }, + /* Replica options */ + { + 's', 202, "master-db", + &instance->master_conn_opt.pgdatabase, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 's', 203, "master-host", + &instance->master_conn_opt.pghost, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 's', 204, "master-port", + &instance->master_conn_opt.pgport, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 's', 205, "master-user", + &instance->master_conn_opt.pguser, SOURCE_CMD, 0, + OPTION_REPLICA_GROUP, 0, option_get_value + }, + { + 'u', 206, "replica-timeout", + &instance->replica_timeout, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_REPLICA_GROUP, OPTION_UNIT_S, option_get_value + }, + /* Archive options */ + { + 'u', 207, "archive-timeout", + &instance->archive_timeout, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_ARCHIVE_GROUP, OPTION_UNIT_S, option_get_value + }, + { + 's', 208, "archive-host", + &instance_config.archive.host, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 209, "archive-port", + &instance_config.archive.port, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 210, "archive-user", + &instance_config.archive.user, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + { + 's', 211, "restore-command", + &instance->restore_command, SOURCE_CMD, 0, + OPTION_ARCHIVE_GROUP, 0, option_get_value + }, + + /* Instance options */ + { + 's', 'D', "pgdata", + &instance->pgdata, SOURCE_CMD, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, + + /* Logging options */ + { + 's', 212, "log-level-console", + &log_level_console, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 213, "log-level-file", + &log_level_file, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 214, "log-filename", + &instance->logger.log_filename, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 215, "error-log-filename", + &instance->logger.error_log_filename, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 's', 216, "log-directory", + &instance->logger.log_directory, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 'U', 217, "log-rotation-size", + &instance->logger.log_rotation_size, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_KB, option_get_value + }, + { + 'U', 218, "log-rotation-age", + &instance->logger.log_rotation_age, SOURCE_CMD, SOURCE_DEFAULT, + OPTION_LOG_GROUP, OPTION_UNIT_MS, option_get_value + }, + /* Retention options */ + { + 'u', 219, "retention-redundancy", + &instance->retention_redundancy, SOURCE_CMD, 0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 220, "retention-window", + &instance->retention_window, SOURCE_CMD, 0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + { + 'u', 221, "wal-depth", + &instance->wal_depth, SOURCE_CMD, 0, + OPTION_RETENTION_GROUP, 0, option_get_value + }, + /* Compression options */ + { + 's', 222, "compress-algorithm", + &compress_alg, SOURCE_CMD, 0, + OPTION_LOG_GROUP, 0, option_get_value + }, + { + 'u', 223, "compress-level", + &instance->compress_level, SOURCE_CMD, 0, + OPTION_COMPRESS_GROUP, 0, option_get_value + }, + /* Remote backup options */ + { + 's', 224, "remote-proto", + &instance->remote.proto, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 225, "remote-host", + &instance->remote.host, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 226, "remote-port", + &instance->remote.port, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 227, "remote-path", + &instance->remote.path, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 228, "remote-user", + &instance->remote.user, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 229, "ssh-options", + &instance->remote.ssh_options, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { + 's', 230, "ssh-config", + &instance->remote.ssh_config, SOURCE_CMD, 0, + OPTION_REMOTE_GROUP, 0, option_get_value + }, + { 0 } + }; - convert_from_base_unit_u(config->log_rotation_age, OPTION_UNIT_S, - &res, &unit); - fprintf(out, "log-rotation-age = " UINT64_FORMAT "%s\n", res, (res)?unit:"min"); - fprintf(out, "#Retention parameters:\n"); - fprintf(out, "retention-redundancy = %u\n", config->retention_redundancy); - fprintf(out, "retention-window = %u\n", config->retention_window); + init_config(instance, instance_name); - fprintf(out, "#Compression parameters:\n"); + sprintf(instance->backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + canonicalize_path(instance->backup_instance_path); - fprintf(out, "compress-algorithm = %s\n", deparse_compress_alg(config->compress_alg)); - fprintf(out, "compress-level = %d\n", config->compress_level); -} + sprintf(instance->arclog_path, "%s/%s/%s", + backup_path, "wal", instance_name); + canonicalize_path(instance->arclog_path); -void -writeBackupCatalogConfigFile(pgBackupConfig *config) -{ - char path[MAXPGPATH]; - FILE *fp; + join_path_components(path, instance->backup_instance_path, + BACKUP_CATALOG_CONF_FILE); - join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); - fp = fopen(path, "wt"); - if (fp == NULL) - elog(ERROR, "cannot create %s: %s", - BACKUP_CATALOG_CONF_FILE, strerror(errno)); + if (fio_access(path, F_OK, FIO_BACKUP_HOST) != 0) + { + elog(WARNING, "Control file \"%s\" doesn't exist", path); + pfree(instance); + return NULL; + } - writeBackupCatalogConfig(fp, config); - fclose(fp); -} + parsed_options = config_read_opt(path, instance_options, WARNING, true, true); + if (parsed_options == 0) + { + elog(WARNING, "Control file \"%s\" is empty", path); + pfree(instance); + return NULL; + } -pgBackupConfig* -readBackupCatalogConfigFile(void) -{ - pgBackupConfig *config = pgut_new(pgBackupConfig); - char path[MAXPGPATH]; + if (log_level_console) + instance->logger.log_level_console = parse_log_level(log_level_console); - pgut_option options[] = - { - /* retention options */ - { 'u', 0, "retention-redundancy", &(config->retention_redundancy),SOURCE_FILE_STRICT }, - { 'u', 0, "retention-window", &(config->retention_window), SOURCE_FILE_STRICT }, - /* compression options */ - { 'f', 0, "compress-algorithm", opt_compress_alg, SOURCE_CMDLINE }, - { 'u', 0, "compress-level", &(config->compress_level), SOURCE_CMDLINE }, - /* logging options */ - { 'f', 0, "log-level-console", opt_log_level_console, SOURCE_CMDLINE }, - { 'f', 0, "log-level-file", opt_log_level_file, SOURCE_CMDLINE }, - { 's', 0, "log-filename", &(config->log_filename), SOURCE_CMDLINE }, - { 's', 0, "error-log-filename", &(config->error_log_filename), SOURCE_CMDLINE }, - { 's', 0, "log-directory", &(config->log_directory), SOURCE_CMDLINE }, - { 'u', 0, "log-rotation-size", &(config->log_rotation_size), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_KB }, - { 'u', 0, "log-rotation-age", &(config->log_rotation_age), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, - /* connection options */ - { 's', 0, "pgdata", &(config->pgdata), SOURCE_FILE_STRICT }, - { 's', 0, "pgdatabase", &(config->pgdatabase), SOURCE_FILE_STRICT }, - { 's', 0, "pghost", &(config->pghost), SOURCE_FILE_STRICT }, - { 's', 0, "pgport", &(config->pgport), SOURCE_FILE_STRICT }, - { 's', 0, "pguser", &(config->pguser), SOURCE_FILE_STRICT }, - /* replica options */ - { 's', 0, "master-host", &(config->master_host), SOURCE_FILE_STRICT }, - { 's', 0, "master-port", &(config->master_port), SOURCE_FILE_STRICT }, - { 's', 0, "master-db", &(config->master_db), SOURCE_FILE_STRICT }, - { 's', 0, "master-user", &(config->master_user), SOURCE_FILE_STRICT }, - { 'u', 0, "replica-timeout", &(config->replica_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, - /* other options */ - { 'U', 0, "system-identifier", &(config->system_identifier), SOURCE_FILE_STRICT }, - /* archive options */ - { 'u', 0, "archive-timeout", &(config->archive_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, - {0} - }; + if (log_level_file) + instance->logger.log_level_file = parse_log_level(log_level_file); - cur_config = config; + if (compress_alg) + instance->compress_alg = parse_compress_alg(compress_alg); - join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); +#if PG_VERSION_NUM >= 110000 + /* If for some reason xlog-seg-size is missing, then set it to 16MB */ + if (!instance->xlog_seg_size) + instance->xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; +#endif - pgBackupConfigInit(config); - pgut_readopt(path, options, ERROR); + return instance; - return config; } static void -opt_log_level_console(pgut_option *opt, const char *arg) +assign_log_level_console(ConfigOption *opt, const char *arg) { - cur_config->log_level_console = parse_log_level(arg); + instance_config.logger.log_level_console = parse_log_level(arg); } static void -opt_log_level_file(pgut_option *opt, const char *arg) +assign_log_level_file(ConfigOption *opt, const char *arg) { - cur_config->log_level_file = parse_log_level(arg); + instance_config.logger.log_level_file = parse_log_level(arg); } static void -opt_compress_alg(pgut_option *opt, const char *arg) +assign_compress_alg(ConfigOption *opt, const char *arg) +{ + instance_config.compress_alg = parse_compress_alg(arg); +} + +static char * +get_log_level_console(ConfigOption *opt) +{ + return pstrdup(deparse_log_level(instance_config.logger.log_level_console)); +} + +static char * +get_log_level_file(ConfigOption *opt) +{ + return pstrdup(deparse_log_level(instance_config.logger.log_level_file)); +} + +static char * +get_compress_alg(ConfigOption *opt) { - cur_config->compress_alg = parse_compress_alg(arg); + return pstrdup(deparse_compress_alg(instance_config.compress_alg)); } /* @@ -292,12 +682,15 @@ opt_compress_alg(pgut_option *opt, const char *arg) static void show_configure_start(void) { - if (show_format == SHOW_PLAIN) - return; - - /* For now we need buffer only for JSON format */ - json_level = 0; initPQExpBuffer(&show_buf); + + if (show_format == SHOW_PLAIN) + current_group = NULL; + else + { + json_level = 0; + json_add(&show_buf, JT_BEGIN_OBJECT, &json_level); + } } /* @@ -307,28 +700,38 @@ static void show_configure_end(void) { if (show_format == SHOW_PLAIN) - return; + current_group = NULL; else + { + json_add(&show_buf, JT_END_OBJECT, &json_level); appendPQExpBufferChar(&show_buf, '\n'); + } fputs(show_buf.data, stdout); termPQExpBuffer(&show_buf); } /* - * Show configure information of pg_probackup. + * Plain output. */ + static void -show_configure(pgBackupConfig *config) +show_configure_plain(ConfigOption *opt) { - show_configure_start(); + char *value; - if (show_format == SHOW_PLAIN) - writeBackupCatalogConfig(stdout, config); - else - show_configure_json(config); + value = opt->get_value(opt); + if (value == NULL) + return; - show_configure_end(); + if (current_group == NULL || strcmp(opt->group, current_group) != 0) + { + current_group = opt->group; + appendPQExpBuffer(&show_buf, "# %s\n", current_group); + } + + appendPQExpBuffer(&show_buf, "%s = %s\n", opt->lname, value); + pfree(value); } /* @@ -336,104 +739,15 @@ show_configure(pgBackupConfig *config) */ static void -show_configure_json(pgBackupConfig *config) +show_configure_json(ConfigOption *opt) { - PQExpBuffer buf = &show_buf; - uint64 res; - const char *unit; - - json_add(buf, JT_BEGIN_OBJECT, &json_level); - - json_add_value(buf, "pgdata", config->pgdata, json_level, false); - - json_add_key(buf, "system-identifier", json_level, true); - appendPQExpBuffer(buf, UINT64_FORMAT, config->system_identifier); - - /* Connection parameters */ - if (config->pgdatabase) - json_add_value(buf, "pgdatabase", config->pgdatabase, json_level, true); - if (config->pghost) - json_add_value(buf, "pghost", config->pghost, json_level, true); - if (config->pgport) - json_add_value(buf, "pgport", config->pgport, json_level, true); - if (config->pguser) - json_add_value(buf, "pguser", config->pguser, json_level, true); - - /* Replica parameters */ - if (config->master_host) - json_add_value(buf, "master-host", config->master_host, json_level, - true); - if (config->master_port) - json_add_value(buf, "master-port", config->master_port, json_level, - true); - if (config->master_db) - json_add_value(buf, "master-db", config->master_db, json_level, true); - if (config->master_user) - json_add_value(buf, "master-user", config->master_user, json_level, - true); - - json_add_key(buf, "replica-timeout", json_level, true); - convert_from_base_unit_u(config->replica_timeout, OPTION_UNIT_S, - &res, &unit); - appendPQExpBuffer(buf, UINT64_FORMAT "%s", res, unit); - - /* Archive parameters */ - json_add_key(buf, "archive-timeout", json_level, true); - convert_from_base_unit_u(config->archive_timeout, OPTION_UNIT_S, - &res, &unit); - appendPQExpBuffer(buf, UINT64_FORMAT "%s", res, unit); - - /* Logging parameters */ - json_add_value(buf, "log-level-console", - deparse_log_level(config->log_level_console), json_level, - true); - json_add_value(buf, "log-level-file", - deparse_log_level(config->log_level_file), json_level, - true); - json_add_value(buf, "log-filename", config->log_filename, json_level, - true); - if (config->error_log_filename) - json_add_value(buf, "error-log-filename", config->error_log_filename, - json_level, true); - - if (strcmp(config->log_directory, LOG_DIRECTORY_DEFAULT) == 0) - { - char log_directory_fullpath[MAXPGPATH]; - - sprintf(log_directory_fullpath, "%s/%s", - backup_path, config->log_directory); + char *value; - json_add_value(buf, "log-directory", log_directory_fullpath, - json_level, true); - } - else - json_add_value(buf, "log-directory", config->log_directory, - json_level, true); - - json_add_key(buf, "log-rotation-size", json_level, true); - convert_from_base_unit_u(config->log_rotation_size, OPTION_UNIT_KB, - &res, &unit); - appendPQExpBuffer(buf, UINT64_FORMAT "%s", res, (res)?unit:"KB"); - - json_add_key(buf, "log-rotation-age", json_level, true); - convert_from_base_unit_u(config->log_rotation_age, OPTION_UNIT_S, - &res, &unit); - appendPQExpBuffer(buf, UINT64_FORMAT "%s", res, (res)?unit:"min"); - - /* Retention parameters */ - json_add_key(buf, "retention-redundancy", json_level, true); - appendPQExpBuffer(buf, "%u", config->retention_redundancy); - - json_add_key(buf, "retention-window", json_level, true); - appendPQExpBuffer(buf, "%u", config->retention_window); + value = opt->get_value(opt); + if (value == NULL) + return; - /* Compression parameters */ - json_add_value(buf, "compress-algorithm", - deparse_compress_alg(config->compress_alg), json_level, + json_add_value(&show_buf, opt->lname, value, json_level, true); - - json_add_key(buf, "compress-level", json_level, true); - appendPQExpBuffer(buf, "%d", config->compress_level); - - json_add(buf, JT_END_OBJECT, &json_level); + pfree(value); } diff --git a/src/data.c b/src/data.c index a66770bcf..fc3107513 100644 --- a/src/data.c +++ b/src/data.c @@ -3,28 +3,37 @@ * data.c: utils to parse and backup data pages * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" -#include -#include -#include -#include - -#include "libpq/pqsignal.h" -#include "storage/block.h" -#include "storage/bufpage.h" +#include "storage/checksum.h" #include "storage/checksum_impl.h" #include +#include "utils/file.h" + +#include +#include #ifdef HAVE_LIBZ #include #endif +#include "utils/thread.h" + +/* Union to ease operations on relation pages */ +typedef struct DataPage +{ + BackupPageHeader bph; + char data[BLCKSZ]; +} DataPage; + +static bool get_page_header(FILE *in, const char *fullpath, BackupPageHeader* bph, + pg_crc32 *crc, bool use_crc32c); + #ifdef HAVE_LIBZ /* Implementation of zlib compression method */ static int32 @@ -53,9 +62,9 @@ zlib_decompress(void *dst, size_t dst_size, void const *src, size_t src_size) * Compresses source into dest using algorithm. Returns the number of bytes * written in the destination buffer, or -1 if compression fails. */ -static int32 +int32 do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, - CompressAlg alg, int level) + CompressAlg alg, int level, const char **errormsg) { switch (alg) { @@ -64,7 +73,13 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; #ifdef HAVE_LIBZ case ZLIB_COMPRESS: - return zlib_compress(dst, dst_size, src, src_size, level); + { + int32 ret; + ret = zlib_compress(dst, dst_size, src, src_size, level); + if (ret < Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } #endif case PGLZ_COMPRESS: return pglz_compress(src, src_size, dst, PGLZ_strategy_always); @@ -77,42 +92,89 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, * Decompresses source into dest using algorithm. Returns the number of bytes * decompressed in the destination buffer, or -1 if decompression fails. */ -static int32 +int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, - CompressAlg alg) + CompressAlg alg, const char **errormsg) { switch (alg) { case NONE_COMPRESS: case NOT_DEFINED_COMPRESS: + if (errormsg) + *errormsg = "Invalid compression algorithm"; return -1; #ifdef HAVE_LIBZ case ZLIB_COMPRESS: - return zlib_decompress(dst, dst_size, src, src_size); + { + int32 ret; + ret = zlib_decompress(dst, dst_size, src, src_size); + if (ret < Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } #endif case PGLZ_COMPRESS: + +#if PG_VERSION_NUM >= 120000 + return pglz_decompress(src, src_size, dst, dst_size, true); +#else return pglz_decompress(src, src_size, dst, dst_size); +#endif } return -1; } + +#define ZLIB_MAGIC 0x78 + /* - * When copying datafiles to backup we validate and compress them block - * by block. Thus special header is required for each data block. + * Before version 2.0.23 there was a bug in pro_backup that pages which compressed + * size is exactly the same as original size are not treated as compressed. + * This check tries to detect and decompress such pages. + * There is no 100% criteria to determine whether page is compressed or not. + * But at least we will do this check only for pages which will no pass validation step. */ -typedef struct BackupPageHeader +static bool +page_may_be_compressed(Page page, CompressAlg alg, uint32 backup_version) { - BlockNumber block; /* block number */ - int32 compressed_size; -} BackupPageHeader; - -/* Special value for compressed_size field */ -#define PageIsTruncated -2 -#define SkipCurrentPage -3 + PageHeader phdr; + + phdr = (PageHeader) page; + + /* First check if page header is valid (it seems to be fast enough check) */ + if (!(PageGetPageSize(phdr) == BLCKSZ && + // PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && + (phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && + phdr->pd_lower >= SizeOfPageHeaderData && + phdr->pd_lower <= phdr->pd_upper && + phdr->pd_upper <= phdr->pd_special && + phdr->pd_special <= BLCKSZ && + phdr->pd_special == MAXALIGN(phdr->pd_special))) + { + /* ... end only if it is invalid, then do more checks */ + if (backup_version >= 20023) + { + /* Versions 2.0.23 and higher don't have such bug */ + return false; + } +#ifdef HAVE_LIBZ + /* For zlib we can check page magic: + * https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like + */ + if (alg == ZLIB_COMPRESS && *(char*)page != ZLIB_MAGIC) + { + return false; + } +#endif + /* otherwise let's try to decompress the page */ + return true; + } + return false; +} /* Verify page's header */ -static bool +bool parse_page(Page page, XLogRecPtr *lsn) { PageHeader phdr = (PageHeader) page; @@ -121,7 +183,7 @@ parse_page(Page page, XLogRecPtr *lsn) *lsn = PageXLogRecPtrGet(phdr->pd_lsn); if (PageGetPageSize(phdr) == BLCKSZ && - PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && + // PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && (phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && phdr->pd_lower >= SizeOfPageHeaderData && phdr->pd_lower <= phdr->pd_upper && @@ -133,95 +195,67 @@ parse_page(Page page, XLogRecPtr *lsn) return false; } -/* Read one page from file directly accessing disk - * return value: - * 0 - if the page is not found - * 1 - if the page is found and valid - * -1 - if the page is found but invalid +/* We know that header is invalid, store specific + * details in errormsg. */ -static int -read_page_from_file(pgFile *file, BlockNumber blknum, - FILE *in, Page page, XLogRecPtr *page_lsn) +void +get_header_errormsg(Page page, char **errormsg) { - off_t offset = blknum * BLCKSZ; - size_t read_len = 0; - - /* read the block */ - if (fseek(in, offset, SEEK_SET) != 0) - elog(ERROR, "File: %s, could not seek to block %u: %s", - file->path, blknum, strerror(errno)); - - read_len = fread(page, 1, BLCKSZ, in); - - if (read_len != BLCKSZ) - { - /* The block could have been truncated. It is fine. */ - if (read_len == 0) - { - elog(LOG, "File %s, block %u, file was truncated", - file->path, blknum); - return 0; - } - else - elog(WARNING, "File: %s, block %u, expected block size %d," - "but read %lu, try again", - file->path, blknum, BLCKSZ, read_len); - } - - /* - * If we found page with invalid header, at first check if it is zeroed, - * which is a valid state for page. If it is not, read it and check header - * again, because it's possible that we've read a partly flushed page. - * If after several attempts page header is still invalid, throw an error. - * The same idea is applied to checksum verification. - */ - if (!parse_page(page, page_lsn)) - { - int i; - /* Check if the page is zeroed. */ - for(i = 0; i < BLCKSZ && page[i] == 0; i++); + PageHeader phdr = (PageHeader) page; + *errormsg = pgut_malloc(ERRMSG_MAX_LEN); + + if (PageGetPageSize(phdr) != BLCKSZ) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "page size %lu is not equal to block size %u", + PageGetPageSize(phdr), BLCKSZ); + + else if (phdr->pd_lower < SizeOfPageHeaderData) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_lower %i is less than page header size %lu", + phdr->pd_lower, SizeOfPageHeaderData); + + else if (phdr->pd_lower > phdr->pd_upper) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_lower %u is greater than pd_upper %u", + phdr->pd_lower, phdr->pd_upper); + + else if (phdr->pd_upper > phdr->pd_special) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_upper %u is greater than pd_special %u", + phdr->pd_upper, phdr->pd_special); + + else if (phdr->pd_special > BLCKSZ) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_special %u is greater than block size %u", + phdr->pd_special, BLCKSZ); + + else if (phdr->pd_special != MAXALIGN(phdr->pd_special)) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_special %i is misaligned, expected %lu", + phdr->pd_special, MAXALIGN(phdr->pd_special)); + + else if (phdr->pd_flags & ~PD_VALID_FLAG_BITS) + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid, " + "pd_flags mask contain illegal bits"); - /* Page is zeroed. No need to check header and checksum. */ - if (i == BLCKSZ) - { - elog(LOG, "File: %s blknum %u, empty page", file->path, blknum); - return 1; - } + else + snprintf(*errormsg, ERRMSG_MAX_LEN, "page header invalid"); +} - /* - * If page is not completely empty and we couldn't parse it, - * try again several times. If it didn't help, throw error - */ - elog(LOG, "File: %s blknum %u have wrong page header, try again", - file->path, blknum); - return -1; - } +/* We know that checksumms are mismatched, store specific + * details in errormsg. + */ +void +get_checksum_errormsg(Page page, char **errormsg, BlockNumber absolute_blkno) +{ + PageHeader phdr = (PageHeader) page; + *errormsg = pgut_malloc(ERRMSG_MAX_LEN); - /* Verify checksum */ - if(current.checksum_version) - { - /* - * If checksum is wrong, sleep a bit and then try again - * several times. If it didn't help, throw error - */ - if (pg_checksum_page(page, file->segno * RELSEG_SIZE + blknum) - != ((PageHeader) page)->pd_checksum) - { - elog(WARNING, "File: %s blknum %u have wrong checksum, try again", - file->path, blknum); - return -1; - } - else - { - /* page header and checksum are correct */ - return 1; - } - } - else - { - /* page header is correct and checksum check is disabled */ - return 1; - } + snprintf(*errormsg, ERRMSG_MAX_LEN, + "page verification failed, " + "calculated checksum %u but expected %u", + phdr->pd_checksum, + pg_checksum_page(page, absolute_blkno)); } /* @@ -230,202 +264,262 @@ read_page_from_file(pgFile *file, BlockNumber blknum, * should be a pointer to allocated BLCKSZ of bytes. * * Prints appropriate warnings/errors/etc into log. - * Returns 0 if page was successfully retrieved - * SkipCurrentPage(-3) if we need to skip this page - * PageIsTruncated(-2) if the page was truncated + * Returns: + * PageIsOk(0) if page was successfully retrieved + * PageIsTruncated(-1) if the page was truncated + * SkipCurrentPage(-2) if we need to skip this page, + * only used for DELTA backup + * PageIsCorrupted(-3) if the page checksum mismatch + * or header corruption, + * only used for checkdb + * TODO: probably we should always + * return it to the caller */ static int32 -prepare_page(backup_files_arg *arguments, +prepare_page(ConnectionArgs *conn_arg, pgFile *file, XLogRecPtr prev_backup_start_lsn, - BlockNumber blknum, BlockNumber nblocks, - FILE *in, int *n_skipped, + BlockNumber blknum, FILE *in, BackupMode backup_mode, - Page page) + Page page, bool strict, + uint32 checksum_version, + int ptrack_version_num, + const char *ptrack_schema, + const char *from_fullpath, + PageState *page_st) { - XLogRecPtr page_lsn = 0; - int try_again = 100; + int try_again = PAGE_READ_ATTEMPTS; bool page_is_valid = false; - bool page_is_truncated = false; BlockNumber absolute_blknum = file->segno * RELSEG_SIZE + blknum; /* check for interrupt */ - if (interrupted) - elog(ERROR, "Interrupted during backup"); + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during page reading"); /* * Read the page and verify its header and checksum. * Under high write load it's possible that we've read partly * flushed page, so try several times before throwing an error. */ - if (backup_mode != BACKUP_MODE_DIFF_PTRACK) + if (backup_mode != BACKUP_MODE_DIFF_PTRACK || ptrack_version_num >= 20) { - while(!page_is_valid && try_again) + int rc = 0; + while (!page_is_valid && try_again--) { - int result = read_page_from_file(file, blknum, - in, page, &page_lsn); + /* read the block */ + int read_len = fio_pread(in, page, blknum * BLCKSZ); - try_again--; - if (result == 0) + /* The block could have been truncated. It is fine. */ + if (read_len == 0) { - /* This block was truncated.*/ - page_is_truncated = true; - /* Page is not actually valid, but it is absent - * and we're not going to reread it or validate */ - page_is_valid = true; + elog(VERBOSE, "Cannot read block %u of \"%s\": " + "block truncated", blknum, from_fullpath); + return PageIsTruncated; } - - if (result == 1) - page_is_valid = true; - - /* - * If ptrack support is available use it to get invalid block - * instead of rereading it 99 times - */ - //elog(WARNING, "Checksum_Version: %i", current.checksum_version ? 1 : 0); - - if (result == -1 && is_ptrack_support) + else if (read_len < 0) + elog(ERROR, "Cannot read block %u of \"%s\": %s", + blknum, from_fullpath, strerror(errno)); + else if (read_len != BLCKSZ) + elog(WARNING, "Cannot read block %u of \"%s\": " + "read %i of %d, try again", + blknum, from_fullpath, read_len, BLCKSZ); + else { - elog(WARNING, "File %s, block %u, try to fetch via SQL", - file->path, blknum); - break; + /* We have BLCKSZ of raw data, validate it */ + rc = validate_one_page(page, absolute_blknum, + InvalidXLogRecPtr, page_st, + checksum_version); + switch (rc) + { + case PAGE_IS_ZEROED: + elog(VERBOSE, "File: \"%s\" blknum %u, empty page", from_fullpath, blknum); + return PageIsOk; + + case PAGE_IS_VALID: + /* in DELTA mode we must compare lsn */ + if (backup_mode == BACKUP_MODE_DIFF_DELTA) + page_is_valid = true; + else + return PageIsOk; + break; + + case PAGE_HEADER_IS_INVALID: + elog(VERBOSE, "File: \"%s\" blknum %u have wrong page header, try again", + from_fullpath, blknum); + break; + + case PAGE_CHECKSUM_MISMATCH: + elog(VERBOSE, "File: \"%s\" blknum %u have wrong checksum, try again", + from_fullpath, blknum); + break; + default: + Assert(false); + } } } + /* * If page is not valid after 100 attempts to read it * throw an error. */ - if(!page_is_valid && !is_ptrack_support) - elog(ERROR, "Data file checksum mismatch. Canceling backup"); + if (!page_is_valid) + { + int elevel = ERROR; + char *errormsg = NULL; + + /* Get the details of corruption */ + if (rc == PAGE_HEADER_IS_INVALID) + get_header_errormsg(page, &errormsg); + else if (rc == PAGE_CHECKSUM_MISMATCH) + get_checksum_errormsg(page, &errormsg, + file->segno * RELSEG_SIZE + blknum); + + /* Error out in case of merge or backup without ptrack support; + * issue warning in case of checkdb or backup with ptrack support + */ + if (!strict) + elevel = WARNING; + + if (errormsg) + elog(elevel, "Corruption detected in file \"%s\", block %u: %s", + from_fullpath, blknum, errormsg); + else + elog(elevel, "Corruption detected in file \"%s\", block %u", + from_fullpath, blknum); + + pg_free(errormsg); + return PageIsCorrupted; + } + + /* Checkdb not going futher */ + if (!strict) + return PageIsOk; } - if (backup_mode == BACKUP_MODE_DIFF_PTRACK || (!page_is_valid && is_ptrack_support)) + /* + * Get page via ptrack interface from PostgreSQL shared buffer. + * We do this only in the cases of PTRACK 1.x versions backup + */ + if (backup_mode == BACKUP_MODE_DIFF_PTRACK + && (ptrack_version_num >= 15 && ptrack_version_num < 20)) { + int rc = 0; size_t page_size = 0; Page ptrack_page = NULL; - ptrack_page = (Page) pg_ptrack_get_block(arguments, file->dbOid, file->tblspcOid, - file->relOid, absolute_blknum, &page_size); + ptrack_page = (Page) pg_ptrack_get_block(conn_arg, file->dbOid, file->tblspcOid, + file->relOid, absolute_blknum, &page_size, + ptrack_version_num, ptrack_schema); if (ptrack_page == NULL) - { /* This block was truncated.*/ - page_is_truncated = true; - } - else if (page_size != BLCKSZ) - { - free(ptrack_page); - elog(ERROR, "File: %s, block %u, expected block size %d, but read %lu", - file->path, absolute_blknum, BLCKSZ, page_size); - } - else + return PageIsTruncated; + + if (page_size != BLCKSZ) + elog(ERROR, "File: \"%s\", block %u, expected block size %d, but read %zu", + from_fullpath, blknum, BLCKSZ, page_size); + + /* + * We need to copy the page that was successfully + * retrieved from ptrack into our output "page" parameter. + */ + memcpy(page, ptrack_page, BLCKSZ); + pg_free(ptrack_page); + + /* + * UPD: It apprears that is possible to get zeroed page or page with invalid header + * from shared buffer. + * Note, that getting page with wrong checksumm from shared buffer is + * acceptable. + */ + rc = validate_one_page(page, absolute_blknum, + InvalidXLogRecPtr, page_st, + checksum_version); + + /* It is ok to get zeroed page */ + if (rc == PAGE_IS_ZEROED) + return PageIsOk; + + /* Getting page with invalid header from shared buffers is unacceptable */ + if (rc == PAGE_HEADER_IS_INVALID) { - /* - * We need to copy the page that was successfully - * retreieved from ptrack into our output "page" parameter. - * We must set checksum here, because it is outdated - * in the block recieved from shared buffers. - */ - memcpy(page, ptrack_page, BLCKSZ); - free(ptrack_page); - if (is_checksum_enabled) - ((PageHeader) page)->pd_checksum = pg_checksum_page(page, absolute_blknum); + char *errormsg = NULL; + get_header_errormsg(page, &errormsg); + elog(ERROR, "Corruption detected in file \"%s\", block %u: %s", + from_fullpath, blknum, errormsg); } - /* get lsn from page, provided by pg_ptrack_get_block() */ - if (backup_mode == BACKUP_MODE_DIFF_DELTA && - file->exists_in_prev && - !page_is_truncated && - !parse_page(page, &page_lsn)) - elog(ERROR, "Cannot parse page after pg_ptrack_get_block. " - "Possible risk of a memory corruption"); + /* + * We must set checksum here, because it is outdated + * in the block recieved from shared buffers. + */ + if (checksum_version) + page_st->checksum = ((PageHeader) page)->pd_checksum = pg_checksum_page(page, absolute_blknum); } + /* + * Skip page if page lsn is less than START_LSN of parent backup. + * Nullified pages must be copied by DELTA backup, just to be safe. + */ if (backup_mode == BACKUP_MODE_DIFF_DELTA && file->exists_in_prev && - !page_is_truncated && - page_lsn < prev_backup_start_lsn) + page_st->lsn > 0 && + page_st->lsn < prev_backup_start_lsn) { - elog(VERBOSE, "Skipping blknum: %u in file: %s", blknum, file->path); - (*n_skipped)++; + elog(VERBOSE, "Skipping blknum %u in file: \"%s\"", blknum, from_fullpath); return SkipCurrentPage; } - if (page_is_truncated) - return PageIsTruncated; - - return 0; + return PageIsOk; } -static void +/* split this function in two: compress() and backup() */ +static int compress_and_backup_page(pgFile *file, BlockNumber blknum, FILE *in, FILE *out, pg_crc32 *crc, int page_state, Page page, - CompressAlg calg, int clevel) + CompressAlg calg, int clevel, + const char *from_fullpath, const char *to_fullpath) { - BackupPageHeader header; - size_t write_buffer_size = sizeof(header); - char write_buffer[BLCKSZ+sizeof(header)]; - char compressed_page[BLCKSZ]; - - if(page_state == SkipCurrentPage) - return; - - header.block = blknum; - header.compressed_size = page_state; - - if(page_state == PageIsTruncated) - { - /* - * The page was truncated. Write only header - * to know that we must truncate restored file - */ - memcpy(write_buffer, &header, sizeof(header)); - } - else + int compressed_size = 0; + size_t write_buffer_size = 0; + char write_buffer[BLCKSZ*2]; /* compressed page may require more space than uncompressed */ + BackupPageHeader* bph = (BackupPageHeader*)write_buffer; + const char *errormsg = NULL; + + /* Compress the page */ + compressed_size = do_compress(write_buffer + sizeof(BackupPageHeader), + sizeof(write_buffer) - sizeof(BackupPageHeader), + page, BLCKSZ, calg, clevel, + &errormsg); + /* Something went wrong and errormsg was assigned, throw a warning */ + if (compressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during compressing block %u of file \"%s\": %s", + blknum, from_fullpath, errormsg); + + file->compress_alg = calg; /* TODO: wtf? why here? */ + + /* compression didn`t worked */ + if (compressed_size <= 0 || compressed_size >= BLCKSZ) { - /* The page was not truncated, so we need to compress it */ - header.compressed_size = do_compress(compressed_page, BLCKSZ, - page, BLCKSZ, calg, clevel); - - file->compress_alg = calg; - file->read_size += BLCKSZ; - Assert (header.compressed_size <= BLCKSZ); - - /* The page was successfully compressed. */ - if (header.compressed_size > 0) - { - memcpy(write_buffer, &header, sizeof(header)); - memcpy(write_buffer + sizeof(header), - compressed_page, header.compressed_size); - write_buffer_size += MAXALIGN(header.compressed_size); - } - /* Nonpositive value means that compression failed. Write it as is. */ - else - { - header.compressed_size = BLCKSZ; - memcpy(write_buffer, &header, sizeof(header)); - memcpy(write_buffer + sizeof(header), page, BLCKSZ); - write_buffer_size += header.compressed_size; - } + /* Do not compress page */ + memcpy(write_buffer + sizeof(BackupPageHeader), page, BLCKSZ); + compressed_size = BLCKSZ; } - - /* elog(VERBOSE, "backup blkno %u, compressed_size %d write_buffer_size %ld", - blknum, header.compressed_size, write_buffer_size); */ + bph->block = blknum; + bph->compressed_size = compressed_size; + write_buffer_size = compressed_size + sizeof(BackupPageHeader); /* Update CRC */ - COMP_CRC32C(*crc, write_buffer, write_buffer_size); + COMP_FILE_CRC32(true, *crc, write_buffer, write_buffer_size); /* write data page */ - if(fwrite(write_buffer, 1, write_buffer_size, out) != write_buffer_size) - { - int errno_tmp = errno; - - fclose(in); - fclose(out); - elog(ERROR, "File: %s, cannot write backup at block %u : %s", - file->path, blknum, strerror(errno_tmp)); - } + if (fio_fwrite(out, write_buffer, write_buffer_size) != write_buffer_size) + elog(ERROR, "File: \"%s\", cannot write at block %u: %s", + to_fullpath, blknum, strerror(errno)); file->write_size += write_buffer_size; + file->uncompressed_size += BLCKSZ; + + return compressed_size; } /* @@ -436,20 +530,31 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, * incremental backup), validate checksum, optionally compress and write to * backup with special header. */ -bool -backup_data_file(backup_files_arg* arguments, - const char *to_path, pgFile *file, +void +backup_data_file(ConnectionArgs* conn_arg, pgFile *file, + const char *from_fullpath, const char *to_fullpath, XLogRecPtr prev_backup_start_lsn, BackupMode backup_mode, - CompressAlg calg, int clevel) + CompressAlg calg, int clevel, uint32 checksum_version, + int ptrack_version_num, const char *ptrack_schema, + HeaderMap *hdr_map, bool is_merge) { - FILE *in; - FILE *out; - BlockNumber blknum = 0; - BlockNumber nblocks = 0; - int n_blocks_skipped = 0; - int n_blocks_read = 0; - int page_state; - char curr_page[BLCKSZ]; + int rc; + bool use_pagemap; + char *errmsg = NULL; + BlockNumber err_blknum = 0; + /* page headers */ + BackupPageHeader2 *headers = NULL; + + /* sanity */ + if (file->size % BLCKSZ != 0) + elog(WARNING, "File: \"%s\", invalid file size %zu", from_fullpath, file->size); + + /* + * Compute expected number of blocks in the file. + * NOTE This is a normal situation, if the file size has changed + * since the moment we computed it. + */ + file->n_blocks = file->size/BLCKSZ; /* * Skip unchanged file only if it exists in previous backup. @@ -462,946 +567,1767 @@ backup_data_file(backup_files_arg* arguments, file->exists_in_prev && !file->pagemap_isabsent) { /* - * There are no changed blocks since last backup. We want make + * There are no changed blocks since last backup. We want to make * incremental backup, so we should exit. */ - elog(VERBOSE, "Skipping the unchanged file: %s", file->path); - return false; + file->write_size = BYTES_INVALID; + return; } /* reset size summary */ file->read_size = 0; file->write_size = 0; - INIT_CRC32C(file->crc); - - /* open backup mode file for read */ - in = fopen(file->path, PG_BINARY_R); - if (in == NULL) - { - FIN_CRC32C(file->crc); - - /* - * If file is not found, this is not en error. - * It could have been deleted by concurrent postgres transaction. - */ - if (errno == ENOENT) - { - elog(LOG, "File \"%s\" is not found", file->path); - return false; - } - - elog(ERROR, "cannot open file \"%s\": %s", - file->path, strerror(errno)); - } - - if (file->size % BLCKSZ != 0) - { - fclose(in); - elog(ERROR, "File: %s, invalid file size %lu", file->path, file->size); - } - - /* - * Compute expected number of blocks in the file. - * NOTE This is a normal situation, if the file size has changed - * since the moment we computed it. - */ - nblocks = file->size/BLCKSZ; - - /* open backup file for write */ - out = fopen(to_path, PG_BINARY_W); - if (out == NULL) - { - int errno_tmp = errno; - fclose(in); - elog(ERROR, "cannot open backup file \"%s\": %s", - to_path, strerror(errno_tmp)); - } + file->uncompressed_size = 0; + INIT_FILE_CRC32(true, file->crc); /* * Read each page, verify checksum and write it to backup. * If page map is empty or file is not present in previous backup * backup all pages of the relation. * - * We will enter here if backup_mode is FULL or DELTA. + * In PTRACK 1.x there was a problem + * of data files with missing _ptrack map. + * Such files should be fully copied. */ - if (file->pagemap.bitmapsize == PageBitmapIsEmpty || - file->pagemap_isabsent || !file->exists_in_prev) + + if (file->pagemap.bitmapsize == PageBitmapIsEmpty || + file->pagemap_isabsent || !file->exists_in_prev || + !file->pagemap.bitmap) + use_pagemap = false; + else + use_pagemap = true; + + /* Remote mode */ + if (fio_is_remote(FIO_DB_HOST)) { - for (blknum = 0; blknum < nblocks; blknum++) - { - page_state = prepare_page(arguments, file, prev_backup_start_lsn, - blknum, nblocks, in, &n_blocks_skipped, - backup_mode, curr_page); - compress_and_backup_page(file, blknum, in, out, &(file->crc), - page_state, curr_page, calg, clevel); - n_blocks_read++; - if (page_state == PageIsTruncated) - break; - } - if (backup_mode == BACKUP_MODE_DIFF_DELTA) - file->n_blocks = n_blocks_read; + + rc = fio_send_pages(to_fullpath, from_fullpath, file, + /* send prev backup START_LSN */ + backup_mode == BACKUP_MODE_DIFF_DELTA && + file->exists_in_prev ? prev_backup_start_lsn : InvalidXLogRecPtr, + calg, clevel, checksum_version, + /* send pagemap if any */ + use_pagemap, + /* variables for error reporting */ + &err_blknum, &errmsg, &headers); } - /* - * If page map is not empty we scan only changed blocks. - * - * We will enter here if backup_mode is PAGE or PTRACK. - */ else { - datapagemap_iterator_t *iter; - iter = datapagemap_iterate(&file->pagemap); - while (datapagemap_next(iter, &blknum)) - { - page_state = prepare_page(arguments, file, prev_backup_start_lsn, - blknum, nblocks, in, &n_blocks_skipped, - backup_mode, curr_page); - compress_and_backup_page(file, blknum, in, out, &(file->crc), - page_state, curr_page, calg, clevel); - n_blocks_read++; - if (page_state == PageIsTruncated) - break; - } + /* TODO: stop handling errors internally */ + rc = send_pages(conn_arg, to_fullpath, from_fullpath, file, + /* send prev backup START_LSN */ + backup_mode == BACKUP_MODE_DIFF_DELTA && + file->exists_in_prev ? prev_backup_start_lsn : InvalidXLogRecPtr, + calg, clevel, checksum_version, use_pagemap, + &headers, backup_mode, ptrack_version_num, ptrack_schema); + } - pg_free(file->pagemap.bitmap); - pg_free(iter); + /* check for errors */ + if (rc == FILE_MISSING) + { + elog(is_merge ? ERROR : LOG, "File not found: \"%s\"", from_fullpath); + file->write_size = FILE_NOT_FOUND; + goto cleanup; } - /* update file permission */ - if (chmod(to_path, FILE_PERMISSION) == -1) + else if (rc == WRITE_FAILED) + elog(ERROR, "Cannot write block %u of \"%s\": %s", + err_blknum, to_fullpath, strerror(errno)); + + else if (rc == PAGE_CORRUPTION) { - int errno_tmp = errno; - fclose(in); - fclose(out); - elog(ERROR, "cannot change mode of \"%s\": %s", file->path, - strerror(errno_tmp)); + if (errmsg) + elog(ERROR, "Corruption detected in file \"%s\", block %u: %s", + from_fullpath, err_blknum, errmsg); + else + elog(ERROR, "Corruption detected in file \"%s\", block %u", + from_fullpath, err_blknum); + } + /* OPEN_FAILED and READ_FAILED */ + else if (rc == OPEN_FAILED) + { + if (errmsg) + elog(ERROR, "%s", errmsg); + else + elog(ERROR, "Cannot open file \"%s\"", from_fullpath); + } + else if (rc == READ_FAILED) + { + if (errmsg) + elog(ERROR, "%s", errmsg); + else + elog(ERROR, "Cannot read file \"%s\"", from_fullpath); } - if (fflush(out) != 0 || - fsync(fileno(out)) != 0 || - fclose(out)) - elog(ERROR, "cannot write backup file \"%s\": %s", - to_path, strerror(errno)); - fclose(in); + file->read_size = rc * BLCKSZ; - FIN_CRC32C(file->crc); + /* refresh n_blocks for FULL and DELTA */ + if (backup_mode == BACKUP_MODE_FULL || + backup_mode == BACKUP_MODE_DIFF_DELTA) + file->n_blocks = file->read_size / BLCKSZ; - /* - * If we have pagemap then file in the backup can't be a zero size. - * Otherwise, we will clear the last file. - */ - if (n_blocks_read != 0 && n_blocks_read == n_blocks_skipped) + /* Determine that file didn`t changed in case of incremental backup */ + if (backup_mode != BACKUP_MODE_FULL && + file->exists_in_prev && + file->write_size == 0 && + file->n_blocks > 0) { - if (remove(to_path) == -1) - elog(ERROR, "cannot remove file \"%s\": %s", to_path, - strerror(errno)); - return false; + file->write_size = BYTES_INVALID; } - return true; +cleanup: + + /* finish CRC calculation */ + FIN_FILE_CRC32(true, file->crc); + + /* dump page headers */ + write_page_headers(headers, file, hdr_map, is_merge); + + pg_free(errmsg); + pg_free(file->pagemap.bitmap); + pg_free(headers); } /* - * Restore files in the from_root directory to the to_root directory with - * same relative path. - * - * If write_header is true then we add header to each restored block, currently - * it is used for MERGE command. + * Backup non data file + * We do not apply compression to this file. + * If file exists in previous backup, then compare checksums + * and make a decision about copying or skiping the file. */ void -restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, - bool write_header) +backup_non_data_file(pgFile *file, pgFile *prev_file, + const char *from_fullpath, const char *to_fullpath, + BackupMode backup_mode, time_t parent_backup_time, + bool missing_ok) { - FILE *in = NULL; - FILE *out = NULL; - BackupPageHeader header; - BlockNumber blknum = 0, - truncate_from = 0; - bool need_truncate = false; - - /* BYTES_INVALID allowed only in case of restoring file from DELTA backup */ - if (file->write_size != BYTES_INVALID) + /* special treatment for global/pg_control */ + if (file->external_dir_num == 0 && strcmp(file->rel_path, XLOG_CONTROL_FILE) == 0) { - /* open backup mode file for read */ - in = fopen(file->path, PG_BINARY_R); - if (in == NULL) - { - elog(ERROR, "cannot open backup file \"%s\": %s", file->path, - strerror(errno)); - } + copy_pgcontrol_file(from_fullpath, FIO_DB_HOST, + to_fullpath, FIO_BACKUP_HOST, file); + return; } /* - * Open backup file for write. We use "r+" at first to overwrite only - * modified pages for differential restore. If the file does not exist, - * re-open it with "w" to create an empty file. - */ - out = fopen(to_path, PG_BINARY_R "+"); - if (out == NULL && errno == ENOENT) - out = fopen(to_path, PG_BINARY_W); - if (out == NULL) - { - int errno_tmp = errno; - fclose(in); - elog(ERROR, "cannot open restore target file \"%s\": %s", - to_path, strerror(errno_tmp)); - } - - while (true) + * If nonedata file exists in previous backup + * and its mtime is less than parent backup start time ... */ + if (prev_file && file->exists_in_prev && + file->mtime <= parent_backup_time) { - off_t write_pos; - size_t read_len; - DataPage compressed_page; /* used as read buffer */ - DataPage page; - - /* File didn`t changed. Nothig to copy */ - if (file->write_size == BYTES_INVALID) - break; - /* - * We need to truncate result file if data file in a incremental backup - * less than data file in a full backup. We know it thanks to n_blocks. - * - * It may be equal to -1, then we don't want to truncate the result - * file. - */ - if (file->n_blocks != BLOCKNUM_INVALID && - (blknum + 1) > file->n_blocks) - { - truncate_from = blknum; - need_truncate = true; - break; - } + file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST, false); - /* read BackupPageHeader */ - read_len = fread(&header, 1, sizeof(header), in); - if (read_len != sizeof(header)) + /* ...and checksum is the same... */ + if (EQ_TRADITIONAL_CRC32(file->crc, prev_file->crc)) { - int errno_tmp = errno; - if (read_len == 0 && feof(in)) - break; /* EOF found */ - else if (read_len != 0 && feof(in)) - elog(ERROR, - "odd size page found at block %u of \"%s\"", - blknum, file->path); - else - elog(ERROR, "cannot read header of block %u of \"%s\": %s", - blknum, file->path, strerror(errno_tmp)); + file->write_size = BYTES_INVALID; + return; /* ...skip copying file. */ } + } - if (header.block < blknum) - elog(ERROR, "backup is broken at file->path %s block %u", - file->path, blknum); + backup_non_data_file_internal(from_fullpath, FIO_DB_HOST, + to_fullpath, file, true); +} - blknum = header.block; +/* + * Iterate over parent backup chain and lookup given destination file in + * filelist of every chain member starting with FULL backup. + * Apply changed blocks to destination file from every backup in parent chain. + */ +size_t +restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, + const char *to_fullpath, bool use_bitmap, PageState *checksum_map, + XLogRecPtr shift_lsn, datapagemap_t *lsn_map, bool use_headers) +{ + size_t total_write_len = 0; + char *in_buf = pgut_malloc(STDIO_BUFSIZE); + int backup_seq = 0; + + /* + * FULL -> INCR -> DEST + * 2 1 0 + * Restore of backups of older versions cannot be optimized with bitmap + * because of n_blocks + */ + if (use_bitmap) + /* start with dest backup */ + backup_seq = 0; + else + /* start with full backup */ + backup_seq = parray_num(parent_chain) - 1; + +// for (i = parray_num(parent_chain) - 1; i >= 0; i--) +// for (i = 0; i < parray_num(parent_chain); i++) + while (backup_seq >= 0 && backup_seq < parray_num(parent_chain)) + { + char from_root[MAXPGPATH]; + char from_fullpath[MAXPGPATH]; + FILE *in = NULL; + + pgFile **res_file = NULL; + pgFile *tmp_file = NULL; + + /* page headers */ + BackupPageHeader2 *headers = NULL; + + pgBackup *backup = (pgBackup *) parray_get(parent_chain, backup_seq); + + if (use_bitmap) + backup_seq++; + else + backup_seq--; + + /* lookup file in intermediate backup */ + res_file = parray_bsearch(backup->files, dest_file, pgFileCompareRelPathWithExternal); + tmp_file = (res_file) ? *res_file : NULL; + + /* Destination file is not exists yet at this moment */ + if (tmp_file == NULL) + continue; + + /* + * Skip file if it haven't changed since previous backup + * and thus was not backed up. + */ + if (tmp_file->write_size == BYTES_INVALID) + continue; + + /* If file was truncated in intermediate backup, + * it is ok not to truncate it now, because old blocks will be + * overwritten by new blocks from next backup. + */ + if (tmp_file->write_size == 0) + continue; + + /* + * At this point we are sure, that something is going to be copied + * Open source file. + */ + join_path_components(from_root, backup->root_dir, DATABASE_DIR); + join_path_components(from_fullpath, from_root, tmp_file->rel_path); + + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open backup file \"%s\": %s", from_fullpath, + strerror(errno)); + + /* set stdio buffering for input data file */ + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + /* get headers for this file */ + if (use_headers && tmp_file->n_headers > 0) + headers = get_data_file_headers(&(backup->hdr_map), tmp_file, + parse_program_version(backup->program_version), + true); + + if (use_headers && !headers && tmp_file->n_headers > 0) + elog(ERROR, "Failed to get page headers for file \"%s\"", from_fullpath); + + /* + * Restore the file. + * Datafiles are backed up block by block and every block + * have BackupPageHeader with meta information, so we cannot just + * copy the file from backup. + */ + total_write_len += restore_data_file_internal(in, out, tmp_file, + parse_program_version(backup->program_version), + from_fullpath, to_fullpath, dest_file->n_blocks, + use_bitmap ? &(dest_file)->pagemap : NULL, + checksum_map, backup->checksum_version, + /* shiftmap can be used only if backup state precedes the shift */ + backup->stop_lsn <= shift_lsn ? lsn_map : NULL, + headers); + + if (fclose(in) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", from_fullpath, + strerror(errno)); + + pg_free(headers); + +// datapagemap_print_debug(&(dest_file)->pagemap); + } + pg_free(in_buf); + + return total_write_len; +} + +/* Restore block from "in" file to "out" file. + * If "nblocks" is greater than zero, then skip restoring blocks, + * whose position if greater than "nblocks". + * If map is NULL, then page bitmap cannot be used for restore optimization + * Page bitmap optimize restore of incremental chains, consisting of more than one + * backup. We restoring from newest to oldest and page, once restored, marked in map. + * When the same page, but in older backup, encountered, we check the map, if it is + * marked as already restored, then page is skipped. + */ +size_t +restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_version, + const char *from_fullpath, const char *to_fullpath, int nblocks, + datapagemap_t *map, PageState *checksum_map, int checksum_version, + datapagemap_t *lsn_map, BackupPageHeader2 *headers) +{ + BlockNumber blknum = 0; + int n_hdr = -1; + size_t write_len = 0; + off_t cur_pos_out = 0; + off_t cur_pos_in = 0; + + /* should not be possible */ + Assert(!(backup_version >= 20400 && file->n_headers <= 0)); + + /* + * We rely on stdio buffering of input and output. + * For buffering to be efficient, we try to minimize the + * number of lseek syscalls, because it forces buffer flush. + * For that, we track current write position in + * output file and issue fseek only when offset of block to be + * written not equal to current write position, which happens + * a lot when blocks from incremental backup are restored, + * but should never happen in case of blocks from FULL backup. + */ + if (fio_fseek(out, cur_pos_out) < 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); + + for (;;) + { + off_t write_pos; + size_t len; + size_t read_len; + DataPage page; + int32 compressed_size = 0; + bool is_compressed = false; + + /* incremental restore vars */ + uint16 page_crc = 0; + XLogRecPtr page_lsn = InvalidXLogRecPtr; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during data file restore"); + + /* newer backups have headers in separate storage */ + if (headers) + { + n_hdr++; + if (n_hdr >= file->n_headers) + break; + + blknum = headers[n_hdr].block; + page_lsn = headers[n_hdr].lsn; + page_crc = headers[n_hdr].checksum; + /* calculate payload size by comparing current and next page positions, + * page header is not included */ + compressed_size = headers[n_hdr+1].pos - headers[n_hdr].pos - sizeof(BackupPageHeader); + + Assert(compressed_size > 0); + Assert(compressed_size <= BLCKSZ); - if (header.compressed_size == PageIsTruncated) + read_len = compressed_size + sizeof(BackupPageHeader); + } + else + { + /* We get into this function either when restoring old backup + * or when merging something. Align read_len only when restoring + * or merging old backups. + */ + if (get_page_header(in, from_fullpath, &(page).bph, NULL, false)) + { + cur_pos_in += sizeof(BackupPageHeader); + + /* backward compatibility kludge TODO: remove in 3.0 */ + blknum = page.bph.block; + compressed_size = page.bph.compressed_size; + + /* this has a potential to backfire when retrying merge of old backups, + * so we just forbid the retrying of failed merges between versions >= 2.4.0 and + * version < 2.4.0 + */ + if (backup_version >= 20400) + read_len = compressed_size; + else + /* For some unknown and possibly dump reason I/O operations + * in versions < 2.4.0 were always aligned to 8 bytes. + * Now we have to deal with backward compatibility. + */ + read_len = MAXALIGN(compressed_size); + + } + else + break; + } + + /* + * Backward compatibility kludge: in the good old days + * n_blocks attribute was available only in DELTA backups. + * File truncate in PAGE and PTRACK happened on the fly when + * special value PageIsTruncated is encountered. + * It was inefficient. + * + * Nowadays every backup type has n_blocks, so instead of + * writing and then truncating redundant data, writing + * is not happening in the first place. + * TODO: remove in 3.0.0 + */ + if (compressed_size == PageIsTruncated) { /* - * Backup contains information that this block was truncated. + * Block header contains information that this block was truncated. * We need to truncate file to this length. */ - truncate_from = blknum; - need_truncate = true; + + elog(VERBOSE, "Truncate file \"%s\" to block %u", to_fullpath, blknum); + + /* To correctly truncate file, we must first flush STDIO buffers */ + if (fio_fflush(out) != 0) + elog(ERROR, "Cannot flush file \"%s\": %s", to_fullpath, strerror(errno)); + + /* Set position to the start of file */ + if (fio_fseek(out, 0) < 0) + elog(ERROR, "Cannot seek to the start of file \"%s\": %s", to_fullpath, strerror(errno)); + + if (fio_ftruncate(out, blknum * BLCKSZ) != 0) + elog(ERROR, "Cannot truncate file \"%s\": %s", to_fullpath, strerror(errno)); + break; } - Assert(header.compressed_size <= BLCKSZ); + Assert(compressed_size > 0); + Assert(compressed_size <= BLCKSZ); + + /* no point in writing redundant data */ + if (nblocks > 0 && blknum >= nblocks) + break; + + if (compressed_size > BLCKSZ) + elog(ERROR, "Size of a blknum %i exceed BLCKSZ: %i", blknum, compressed_size); - read_len = fread(compressed_page.data, 1, - MAXALIGN(header.compressed_size), in); - if (read_len != MAXALIGN(header.compressed_size)) - elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", - blknum, file->path, read_len, header.compressed_size); + /* Incremental restore in LSN mode */ + if (map && lsn_map && datapagemap_is_set(lsn_map, blknum)) + datapagemap_add(map, blknum); - if (header.compressed_size != BLCKSZ) + if (map && checksum_map && checksum_map[blknum].checksum != 0) { - int32 uncompressed_size = 0; + //elog(INFO, "HDR CRC: %u, MAP CRC: %u", page_crc, checksum_map[blknum].checksum); + /* + * The heart of incremental restore in CHECKSUM mode + * If page in backup has the same checksum and lsn as + * page in backup, then page can be skipped. + */ + if (page_crc == checksum_map[blknum].checksum && + page_lsn == checksum_map[blknum].lsn) + { + datapagemap_add(map, blknum); + } + } - uncompressed_size = do_decompress(page.data, BLCKSZ, - compressed_page.data, - MAXALIGN(header.compressed_size), - file->compress_alg); + /* if this page is marked as already restored, then skip it */ + if (map && datapagemap_is_set(map, blknum)) + { + /* Backward compatibility kludge TODO: remove in 3.0 + * go to the next page. + */ + if (!headers && fseek(in, read_len, SEEK_CUR) != 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, from_fullpath, strerror(errno)); + continue; + } - if (uncompressed_size != BLCKSZ) - elog(ERROR, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", - file->path, uncompressed_size); + if (headers && + cur_pos_in != headers[n_hdr].pos) + { + if (fseek(in, headers[n_hdr].pos, SEEK_SET) != 0) + elog(ERROR, "Cannot seek to offset %u of \"%s\": %s", + headers[n_hdr].pos, from_fullpath, strerror(errno)); + + cur_pos_in = headers[n_hdr].pos; } - write_pos = (write_header) ? blknum * (BLCKSZ + sizeof(header)) : - blknum * BLCKSZ; + /* read a page from file */ + if (headers) + len = fread(&page, 1, read_len, in); + else + len = fread(page.data, 1, read_len, in); + + if (len != read_len) + elog(ERROR, "Cannot read block %u file \"%s\": %s", + blknum, from_fullpath, strerror(errno)); + + cur_pos_in += read_len; + + /* + * if page size is smaller than BLCKSZ, decompress the page. + * BUGFIX for versions < 2.0.23: if page size is equal to BLCKSZ. + * we have to check, whether it is compressed or not using + * page_may_be_compressed() function. + */ + if (compressed_size != BLCKSZ + || page_may_be_compressed(page.data, file->compress_alg, + backup_version)) + { + is_compressed = true; + } /* * Seek and write the restored page. + * When restoring file from FULL backup, pages are written sequentially, + * so there is no need to issue fseek for every page. */ - if (fseek(out, write_pos, SEEK_SET) < 0) - elog(ERROR, "cannot seek block %u of \"%s\": %s", - blknum, to_path, strerror(errno)); + write_pos = blknum * BLCKSZ; - if (write_header) + if (cur_pos_out != write_pos) { - if (fwrite(&header, 1, sizeof(header), out) != sizeof(header)) - elog(ERROR, "cannot write header of block %u of \"%s\": %s", - blknum, file->path, strerror(errno)); + if (fio_fseek(out, write_pos) < 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); + + cur_pos_out = write_pos; } - if (header.compressed_size < BLCKSZ) + /* If page is compressed and restore is in remote mode, send compressed + * page to the remote side. + */ + if (is_compressed) { - if (fwrite(page.data, 1, BLCKSZ, out) != BLCKSZ) - elog(ERROR, "cannot write block %u of \"%s\": %s", - blknum, file->path, strerror(errno)); + ssize_t rc; + rc = fio_fwrite_compressed(out, page.data, compressed_size, file->compress_alg); + + if (!fio_is_remote_file(out) && rc != BLCKSZ) + elog(ERROR, "Cannot write block %u of \"%s\": %s, size: %u", + blknum, to_fullpath, strerror(errno), compressed_size); } else { - /* if page wasn't compressed, we've read full block */ - if (fwrite(compressed_page.data, 1, BLCKSZ, out) != BLCKSZ) - elog(ERROR, "cannot write block %u of \"%s\": %s", - blknum, file->path, strerror(errno)); + if (fio_fwrite(out, page.data, BLCKSZ) != BLCKSZ) + elog(ERROR, "Cannot write block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); } + + write_len += BLCKSZ; + cur_pos_out += BLCKSZ; /* update current write position */ + + /* Mark page as restored to avoid reading this page when restoring parent backups */ + if (map) + datapagemap_add(map, blknum); } - /* - * DELTA backup have no knowledge about truncated blocks as PAGE or PTRACK do - * But during DELTA backup we read every file in PGDATA and thus DELTA backup - * knows exact size of every file at the time of backup. - * So when restoring file from DELTA backup we, knowning it`s size at - * a time of a backup, can truncate file to this size. - */ - if (allow_truncate && file->n_blocks != BLOCKNUM_INVALID && !need_truncate) + elog(VERBOSE, "Copied file \"%s\": %lu bytes", from_fullpath, write_len); + return write_len; +} + +/* + * Copy file to backup. + * We do not apply compression to these files, because + * it is either small control file or already compressed cfs file. + */ +void +restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, + const char *from_fullpath, const char *to_fullpath) +{ + size_t read_len = 0; + char *buf = pgut_malloc(STDIO_BUFSIZE); /* 64kB buffer */ + + /* copy content */ + for (;;) { - size_t file_size = 0; + read_len = 0; + + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during nonedata file restore"); - /* get file current size */ - fseek(out, 0, SEEK_END); - file_size = ftell(out); + read_len = fread(buf, 1, STDIO_BUFSIZE, in); + + if (ferror(in)) + elog(ERROR, "Cannot read backup file \"%s\": %s", + from_fullpath, strerror(errno)); - if (file_size > file->n_blocks * BLCKSZ) + if (read_len > 0) { - truncate_from = file->n_blocks; - need_truncate = true; + if (fio_fwrite(out, buf, read_len) != read_len) + elog(ERROR, "Cannot write to \"%s\": %s", to_fullpath, + strerror(errno)); } + + if (feof(in)) + break; } - if (need_truncate) - { - off_t write_pos; + pg_free(buf); + + elog(VERBOSE, "Copied file \"%s\": %lu bytes", from_fullpath, file->write_size); +} + +size_t +restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, + pgFile *dest_file, FILE *out, const char *to_fullpath, + bool already_exists) +{ + char from_root[MAXPGPATH]; + char from_fullpath[MAXPGPATH]; + FILE *in = NULL; - write_pos = (write_header) ? truncate_from * (BLCKSZ + sizeof(header)) : - truncate_from * BLCKSZ; + pgFile *tmp_file = NULL; + pgBackup *tmp_backup = NULL; + /* Check if full copy of destination file is available in destination backup */ + if (dest_file->write_size > 0) + { + tmp_file = dest_file; + tmp_backup = dest_backup; + } + else + { /* - * Truncate file to this length. + * Iterate over parent chain starting from direct parent of destination + * backup to oldest backup in chain, and look for the first + * full copy of destination file. + * Full copy is latest possible destination file with size equal or + * greater than zero. */ - if (ftruncate(fileno(out), write_pos) != 0) - elog(ERROR, "cannot truncate \"%s\": %s", - file->path, strerror(errno)); - elog(INFO, "Delta truncate file %s to block %u", - file->path, truncate_from); + tmp_backup = dest_backup->parent_backup_link; + while (tmp_backup) + { + pgFile **res_file = NULL; + + /* lookup file in intermediate backup */ + res_file = parray_bsearch(tmp_backup->files, dest_file, pgFileCompareRelPathWithExternal); + tmp_file = (res_file) ? *res_file : NULL; + + /* + * It should not be possible not to find destination file in intermediate + * backup, without encountering full copy first. + */ + if (!tmp_file) + { + elog(ERROR, "Failed to locate nonedata file \"%s\" in backup %s", + dest_file->rel_path, base36enc(tmp_backup->start_time)); + continue; + } + + /* Full copy is found and it is null sized, nothing to do here */ + if (tmp_file->write_size == 0) + { + /* In case of incremental restore truncate file just to be safe */ + if (already_exists && fio_ftruncate(out, 0)) + elog(ERROR, "Cannot truncate file \"%s\": %s", + to_fullpath, strerror(errno)); + return 0; + } + + /* Full copy is found */ + if (tmp_file->write_size > 0) + break; + + tmp_backup = tmp_backup->parent_backup_link; + } } - /* update file permission */ - if (chmod(to_path, file->mode) == -1) + /* sanity */ + if (!tmp_backup) + elog(ERROR, "Failed to locate a backup containing full copy of nonedata file \"%s\"", + to_fullpath); + + if (!tmp_file) + elog(ERROR, "Failed to locate a full copy of nonedata file \"%s\"", to_fullpath); + + if (tmp_file->write_size <= 0) + elog(ERROR, "Full copy of nonedata file has invalid size: %li. " + "Metadata corruption in backup %s in file: \"%s\"", + tmp_file->write_size, base36enc(tmp_backup->start_time), + to_fullpath); + + /* incremental restore */ + if (already_exists) { - int errno_tmp = errno; + /* compare checksums of already existing file and backup file */ + pg_crc32 file_crc = fio_get_crc32(to_fullpath, FIO_DB_HOST, false); + + if (file_crc == tmp_file->crc) + { + elog(VERBOSE, "Already existing nonedata file \"%s\" has the same checksum, skip restore", + to_fullpath); + return 0; + } - if (in) - fclose(in); - fclose(out); - elog(ERROR, "cannot change mode of \"%s\": %s", to_path, - strerror(errno_tmp)); + /* Checksum mismatch, truncate file and overwrite it */ + if (fio_ftruncate(out, 0)) + elog(ERROR, "Cannot truncate file \"%s\": %s", + to_fullpath, strerror(errno)); } - if (fflush(out) != 0 || - fsync(fileno(out)) != 0 || - fclose(out)) - elog(ERROR, "cannot write \"%s\": %s", to_path, strerror(errno)); - if (in) - fclose(in); + if (tmp_file->external_dir_num == 0) + join_path_components(from_root, tmp_backup->root_dir, DATABASE_DIR); + else + { + char external_prefix[MAXPGPATH]; + + join_path_components(external_prefix, tmp_backup->root_dir, EXTERNAL_DIR); + makeExternalDirPathByNum(from_root, external_prefix, tmp_file->external_dir_num); + } + + join_path_components(from_fullpath, from_root, dest_file->rel_path); + + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open backup file \"%s\": %s", from_fullpath, + strerror(errno)); + + /* disable stdio buffering for nonedata files */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + + /* do actual work */ + restore_non_data_file_internal(in, out, tmp_file, from_fullpath, to_fullpath); + + if (fclose(in) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", from_fullpath, + strerror(errno)); + + return tmp_file->write_size; } /* * Copy file to backup. * We do not apply compression to these files, because * it is either small control file or already compressed cfs file. + * TODO: optimize remote copying + */ +void +backup_non_data_file_internal(const char *from_fullpath, + fio_location from_location, + const char *to_fullpath, pgFile *file, + bool missing_ok) +{ + FILE *in = NULL; + FILE *out = NULL; + ssize_t read_len = 0; + char *buf = NULL; + + INIT_FILE_CRC32(true, file->crc); + + /* reset size summary */ + file->read_size = 0; + file->write_size = 0; + file->uncompressed_size = 0; + + /* open backup file for write */ + out = fopen(to_fullpath, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open destination file \"%s\": %s", + to_fullpath, strerror(errno)); + + /* update file permission */ + if (chmod(to_fullpath, file->mode) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); + + /* backup remote file */ + if (fio_is_remote(FIO_DB_HOST)) + { + char *errmsg = NULL; + int rc = fio_send_file(from_fullpath, to_fullpath, out, file, &errmsg); + + /* handle errors */ + if (rc == FILE_MISSING) + { + /* maybe deleted, it's not error in case of backup */ + if (missing_ok) + { + elog(LOG, "File \"%s\" is not found", from_fullpath); + file->write_size = FILE_NOT_FOUND; + goto cleanup; + } + else + elog(ERROR, "File \"%s\" is not found", from_fullpath); + } + else if (rc == WRITE_FAILED) + elog(ERROR, "Cannot write to \"%s\": %s", to_fullpath, strerror(errno)); + else if (rc != SEND_OK) + { + if (errmsg) + elog(ERROR, "%s", errmsg); + else + elog(ERROR, "Cannot access remote file \"%s\"", from_fullpath); + } + + pg_free(errmsg); + } + /* backup local file */ + else + { + /* open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + { + /* maybe deleted, it's not error in case of backup */ + if (errno == ENOENT) + { + if (missing_ok) + { + elog(LOG, "File \"%s\" is not found", from_fullpath); + file->write_size = FILE_NOT_FOUND; + goto cleanup; + } + else + elog(ERROR, "File \"%s\" is not found", from_fullpath); + } + + elog(ERROR, "Cannot open file \"%s\": %s", from_fullpath, + strerror(errno)); + } + + /* disable stdio buffering for local input/output files to avoid triple buffering */ + setvbuf(in, NULL, _IONBF, BUFSIZ); + setvbuf(out, NULL, _IONBF, BUFSIZ); + + /* allocate 64kB buffer */ + buf = pgut_malloc(CHUNK_SIZE); + + /* copy content and calc CRC */ + for (;;) + { + read_len = fread(buf, 1, CHUNK_SIZE, in); + + if (ferror(in)) + elog(ERROR, "Cannot read from file \"%s\": %s", + from_fullpath, strerror(errno)); + + if (read_len > 0) + { + if (fwrite(buf, 1, read_len, out) != read_len) + elog(ERROR, "Cannot write to file \"%s\": %s", to_fullpath, + strerror(errno)); + + /* update CRC */ + COMP_FILE_CRC32(true, file->crc, buf, read_len); + file->read_size += read_len; + } + + if (feof(in)) + break; + } + } + + file->write_size = (int64) file->read_size; + + if (file->write_size > 0) + file->uncompressed_size = file->write_size; + +cleanup: + /* finish CRC calculation and store into pgFile */ + FIN_FILE_CRC32(true, file->crc); + + if (in && fclose(in)) + elog(ERROR, "Cannot close the file \"%s\": %s", from_fullpath, strerror(errno)); + + if (out && fclose(out)) + elog(ERROR, "Cannot close the file \"%s\": %s", to_fullpath, strerror(errno)); + + pg_free(buf); +} + +/* + * Create empty file, used for partial restore */ bool -copy_file(const char *from_root, const char *to_root, pgFile *file) +create_empty_file(fio_location from_location, const char *to_root, + fio_location to_location, pgFile *file) { char to_path[MAXPGPATH]; - FILE *in; FILE *out; + + /* open file for write */ + join_path_components(to_path, to_root, file->rel_path); + out = fio_fopen(to_path, PG_BINARY_W, to_location); + + if (out == NULL) + elog(ERROR, "Cannot open destination file \"%s\": %s", + to_path, strerror(errno)); + + /* update file permission */ + if (fio_chmod(to_path, file->mode, to_location) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_path, + strerror(errno)); + + if (fio_fclose(out)) + elog(ERROR, "Cannot close \"%s\": %s", to_path, strerror(errno)); + + return true; +} + +/* + * Validate given page. + * This function is expected to be executed multiple times, + * so avoid using elog within it. + * lsn from page is assigned to page_lsn pointer. + * TODO: switch to enum for return codes. + */ +int +validate_one_page(Page page, BlockNumber absolute_blkno, + XLogRecPtr stop_lsn, PageState *page_st, + uint32 checksum_version) +{ + page_st->lsn = InvalidXLogRecPtr; + page_st->checksum = 0; + + /* new level of paranoia */ + if (page == NULL) + return PAGE_IS_NOT_FOUND; + + /* check that page header is ok */ + if (!parse_page(page, &(page_st)->lsn)) + { + int i; + /* Check if the page is zeroed. */ + for (i = 0; i < BLCKSZ && page[i] == 0; i++); + + /* Page is zeroed. No need to verify checksums */ + if (i == BLCKSZ) + return PAGE_IS_ZEROED; + + /* Page does not looking good */ + return PAGE_HEADER_IS_INVALID; + } + + /* Verify checksum */ + page_st->checksum = pg_checksum_page(page, absolute_blkno); + + if (checksum_version) + { + /* Checksums are enabled, so check them. */ + if (page_st->checksum != ((PageHeader) page)->pd_checksum) + return PAGE_CHECKSUM_MISMATCH; + } + + /* At this point page header is sane, if checksums are enabled - the`re ok. + * Check that page is not from future. + * Note, this check should be used only by validate command. + */ + if (stop_lsn > 0) + { + /* Get lsn from page header. Ensure that page is from our time. */ + if (page_st->lsn > stop_lsn) + return PAGE_LSN_FROM_FUTURE; + } + + return PAGE_IS_VALID; +} + +/* + * Valiate pages of datafile in PGDATA one by one. + * + * returns true if the file is valid + * also returns true if the file was not found + */ +bool +check_data_file(ConnectionArgs *arguments, pgFile *file, + const char *from_fullpath, uint32 checksum_version) +{ + FILE *in; + BlockNumber blknum = 0; + BlockNumber nblocks = 0; + int page_state; + char curr_page[BLCKSZ]; + bool is_valid = true; + + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) + { + /* + * If file is not found, this is not en error. + * It could have been deleted by concurrent postgres transaction. + */ + if (errno == ENOENT) + { + elog(LOG, "File \"%s\" is not found", from_fullpath); + return true; + } + + elog(WARNING, "Cannot open file \"%s\": %s", + from_fullpath, strerror(errno)); + return false; + } + + if (file->size % BLCKSZ != 0) + elog(WARNING, "File: \"%s\", invalid file size %zu", from_fullpath, file->size); + + /* + * Compute expected number of blocks in the file. + * NOTE This is a normal situation, if the file size has changed + * since the moment we computed it. + */ + nblocks = file->size/BLCKSZ; + + for (blknum = 0; blknum < nblocks; blknum++) + { + PageState page_st; + page_state = prepare_page(NULL, file, InvalidXLogRecPtr, + blknum, in, BACKUP_MODE_FULL, + curr_page, false, checksum_version, + 0, NULL, from_fullpath, &page_st); + + if (page_state == PageIsTruncated) + break; + + if (page_state == PageIsCorrupted) + { + /* Page is corrupted, no need to elog about it, + * prepare_page() already done that + */ + is_valid = false; + continue; + } + } + + fclose(in); + return is_valid; +} + +/* Valiate pages of datafile in backup one by one */ +bool +validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version, HeaderMap *hdr_map) +{ size_t read_len = 0; - int errno_tmp; - char buf[BLCKSZ]; - struct stat st; + bool is_valid = true; + FILE *in; pg_crc32 crc; + bool use_crc32c = backup_version <= 20021 || backup_version >= 20025; + BackupPageHeader2 *headers = NULL; + int n_hdr = -1; + off_t cur_pos_in = 0; + + elog(VERBOSE, "Validate relation blocks for file \"%s\"", fullpath); + + /* should not be possible */ + Assert(!(backup_version >= 20400 && file->n_headers <= 0)); + + in = fopen(fullpath, PG_BINARY_R); + if (in == NULL) + elog(ERROR, "Cannot open file \"%s\": %s", + fullpath, strerror(errno)); + + headers = get_data_file_headers(hdr_map, file, backup_version, false); + + if (!headers && file->n_headers > 0) + { + elog(WARNING, "Cannot get page headers for file \"%s\"", fullpath); + return false; + } + + /* calc CRC of backup file */ + INIT_FILE_CRC32(use_crc32c, crc); + + /* read and validate pages one by one */ + while (true) + { + int rc = 0; + size_t len = 0; + DataPage compressed_page; /* used as read buffer */ + int compressed_size = 0; + DataPage page; + BlockNumber blknum = 0; + PageState page_st; + + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during data file validation"); + + /* newer backups have page headers in separate storage */ + if (headers) + { + n_hdr++; + if (n_hdr >= file->n_headers) + break; + + blknum = headers[n_hdr].block; + /* calculate payload size by comparing current and next page positions, + * page header is not included. + */ + compressed_size = headers[n_hdr+1].pos - headers[n_hdr].pos - sizeof(BackupPageHeader); + + Assert(compressed_size > 0); + Assert(compressed_size <= BLCKSZ); + + read_len = sizeof(BackupPageHeader) + compressed_size; + + if (cur_pos_in != headers[n_hdr].pos) + { + if (fio_fseek(in, headers[n_hdr].pos) < 0) + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, fullpath, strerror(errno)); + else + elog(INFO, "Seek to %u", headers[n_hdr].pos); + + cur_pos_in = headers[n_hdr].pos; + } + } + /* old backups rely on header located directly in data file */ + else + { + if (get_page_header(in, fullpath, &(compressed_page).bph, &crc, use_crc32c)) + { + /* Backward compatibility kludge, TODO: remove in 3.0 + * for some reason we padded compressed pages in old versions + */ + blknum = compressed_page.bph.block; + compressed_size = compressed_page.bph.compressed_size; + read_len = MAXALIGN(compressed_size); + } + else + break; + } + + /* backward compatibility kludge TODO: remove in 3.0 */ + if (compressed_size == PageIsTruncated) + { + elog(INFO, "Block %u of \"%s\" is truncated", + blknum, fullpath); + continue; + } + + Assert(compressed_size <= BLCKSZ); + Assert(compressed_size > 0); + + if (headers) + len = fread(&compressed_page, 1, read_len, in); + else + len = fread(compressed_page.data, 1, read_len, in); + + if (len != read_len) + { + elog(WARNING, "Cannot read block %u file \"%s\": %s", + blknum, fullpath, strerror(errno)); + return false; + } + + /* update current position */ + cur_pos_in += read_len; + + if (headers) + COMP_FILE_CRC32(use_crc32c, crc, &compressed_page, read_len); + else + COMP_FILE_CRC32(use_crc32c, crc, compressed_page.data, read_len); + + if (compressed_size != BLCKSZ + || page_may_be_compressed(compressed_page.data, file->compress_alg, + backup_version)) + { + int32 uncompressed_size = 0; + const char *errormsg = NULL; + + uncompressed_size = do_decompress(page.data, BLCKSZ, + compressed_page.data, + compressed_size, + file->compress_alg, + &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + { + elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", + blknum, fullpath, errormsg); + return false; + } + + if (uncompressed_size != BLCKSZ) + { + if (compressed_size == BLCKSZ) + { + is_valid = false; + continue; + } + elog(WARNING, "Page %u of file \"%s\" uncompressed to %d bytes. != BLCKSZ", + blknum, fullpath, uncompressed_size); + return false; + } + + rc = validate_one_page(page.data, + file->segno * RELSEG_SIZE + blknum, + stop_lsn, &page_st, checksum_version); + } + else + rc = validate_one_page(compressed_page.data, + file->segno * RELSEG_SIZE + blknum, + stop_lsn, &page_st, checksum_version); + + switch (rc) + { + case PAGE_IS_NOT_FOUND: + elog(LOG, "File \"%s\", block %u, page is NULL", file->rel_path, blknum); + break; + case PAGE_IS_ZEROED: + elog(LOG, "File: %s blknum %u, empty zeroed page", file->rel_path, blknum); + break; + case PAGE_HEADER_IS_INVALID: + elog(WARNING, "Page header is looking insane: %s, block %i", file->rel_path, blknum); + is_valid = false; + break; + case PAGE_CHECKSUM_MISMATCH: + elog(WARNING, "File: %s blknum %u have wrong checksum: %u", file->rel_path, blknum, page_st.checksum); + is_valid = false; + break; + case PAGE_LSN_FROM_FUTURE: + elog(WARNING, "File: %s, block %u, checksum is %s. " + "Page is from future: pageLSN %X/%X stopLSN %X/%X", + file->rel_path, blknum, + checksum_version ? "correct" : "not enabled", + (uint32) (page_st.lsn >> 32), (uint32) page_st.lsn, + (uint32) (stop_lsn >> 32), (uint32) stop_lsn); + break; + } + } + + FIN_FILE_CRC32(use_crc32c, crc); + fclose(in); + + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\": %X. Expected %X", + fullpath, crc, file->crc); + is_valid = false; + } + + pg_free(headers); + + return is_valid; +} + +/* read local data file and construct map with block checksums */ +PageState* +get_checksum_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr dest_stop_lsn, BlockNumber segmentno) +{ + PageState *checksum_map = NULL; + FILE *in = NULL; + BlockNumber blknum = 0; + char read_buffer[BLCKSZ]; + char in_buf[STDIO_BUFSIZE]; + + /* open file */ + in = fopen(fullpath, "r+b"); + if (!in) + elog(ERROR, "Cannot open source file \"%s\": %s", fullpath, strerror(errno)); + + /* truncate up to blocks */ + if (ftruncate(fileno(in), n_blocks * BLCKSZ) != 0) + elog(ERROR, "Cannot truncate file to blknum %u \"%s\": %s", + n_blocks, fullpath, strerror(errno)); + + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + /* initialize array of checksums */ + checksum_map = pgut_malloc(n_blocks * sizeof(PageState)); + memset(checksum_map, 0, n_blocks * sizeof(PageState)); + + for (blknum = 0; blknum < n_blocks; blknum++) + { + size_t read_len = fread(read_buffer, 1, BLCKSZ, in); + PageState page_st; + + /* report error */ + if (ferror(in)) + elog(ERROR, "Cannot read block %u of \"%s\": %s", + blknum, fullpath, strerror(errno)); + + if (read_len == BLCKSZ) + { + int rc = validate_one_page(read_buffer, segmentno + blknum, + dest_stop_lsn, &page_st, + checksum_version); + + if (rc == PAGE_IS_VALID) + { +// if (checksum_version) +// checksum_map[blknum].checksum = ((PageHeader) read_buffer)->pd_checksum; +// else +// checksum_map[blknum].checksum = page_st.checksum; + checksum_map[blknum].checksum = page_st.checksum; + checksum_map[blknum].lsn = page_st.lsn; + } + } + else + elog(ERROR, "Failed to read blknum %u from file \"%s\"", blknum, fullpath); + + if (feof(in)) + break; - INIT_CRC32C(crc); + if (interrupted) + elog(ERROR, "Interrupted during page reading"); + } - /* reset size summary */ - file->read_size = 0; - file->write_size = 0; + if (in) + fclose(in); - /* open backup mode file for read */ - in = fopen(file->path, PG_BINARY_R); - if (in == NULL) - { - FIN_CRC32C(crc); - file->crc = crc; + return checksum_map; +} - /* maybe deleted, it's not error */ - if (errno == ENOENT) - return false; +/* return bitmap of valid blocks, bitmap is empty, then NULL is returned */ +datapagemap_t * +get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr shift_lsn, BlockNumber segmentno) +{ + FILE *in = NULL; + BlockNumber blknum = 0; + char read_buffer[BLCKSZ]; + char in_buf[STDIO_BUFSIZE]; + datapagemap_t *lsn_map = NULL; - elog(ERROR, "cannot open source file \"%s\": %s", file->path, - strerror(errno)); - } + Assert(shift_lsn > 0); - /* open backup file for write */ - join_path_components(to_path, to_root, file->path + strlen(from_root) + 1); - out = fopen(to_path, PG_BINARY_W); - if (out == NULL) - { - int errno_tmp = errno; - fclose(in); - elog(ERROR, "cannot open destination file \"%s\": %s", - to_path, strerror(errno_tmp)); - } + /* open file */ + in = fopen(fullpath, "r+b"); + if (!in) + elog(ERROR, "Cannot open source file \"%s\": %s", fullpath, strerror(errno)); - /* stat source file to change mode of destination file */ - if (fstat(fileno(in), &st) == -1) - { - fclose(in); - fclose(out); - elog(ERROR, "cannot stat \"%s\": %s", file->path, - strerror(errno)); - } + /* truncate up to blocks */ + if (ftruncate(fileno(in), n_blocks * BLCKSZ) != 0) + elog(ERROR, "Cannot truncate file to blknum %u \"%s\": %s", + n_blocks, fullpath, strerror(errno)); - /* copy content and calc CRC */ - for (;;) + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + lsn_map = pgut_malloc(sizeof(datapagemap_t)); + memset(lsn_map, 0, sizeof(datapagemap_t)); + + for (blknum = 0; blknum < n_blocks; blknum++) { - read_len = 0; + size_t read_len = fread(read_buffer, 1, BLCKSZ, in); + PageState page_st; - if ((read_len = fread(buf, 1, sizeof(buf), in)) != sizeof(buf)) - break; + /* report error */ + if (ferror(in)) + elog(ERROR, "Cannot read block %u of \"%s\": %s", + blknum, fullpath, strerror(errno)); - if (fwrite(buf, 1, read_len, out) != read_len) + if (read_len == BLCKSZ) { - errno_tmp = errno; - /* oops */ - fclose(in); - fclose(out); - elog(ERROR, "cannot write to \"%s\": %s", to_path, - strerror(errno_tmp)); + int rc = validate_one_page(read_buffer, segmentno + blknum, + shift_lsn, &page_st, checksum_version); + + if (rc == PAGE_IS_VALID) + datapagemap_add(lsn_map, blknum); } - /* update CRC */ - COMP_CRC32C(crc, buf, read_len); + else + elog(ERROR, "Cannot read block %u from file \"%s\": %s", + blknum, fullpath, strerror(errno)); + + if (feof(in)) + break; - file->read_size += read_len; + if (interrupted) + elog(ERROR, "Interrupted during page reading"); } - errno_tmp = errno; - if (!feof(in)) - { + if (in) fclose(in); - fclose(out); - elog(ERROR, "cannot read backup mode file \"%s\": %s", - file->path, strerror(errno_tmp)); - } - /* copy odd part. */ - if (read_len > 0) + if (lsn_map->bitmapsize == 0) { - if (fwrite(buf, 1, read_len, out) != read_len) - { - errno_tmp = errno; - /* oops */ - fclose(in); - fclose(out); - elog(ERROR, "cannot write to \"%s\": %s", to_path, - strerror(errno_tmp)); - } - /* update CRC */ - COMP_CRC32C(crc, buf, read_len); - - file->read_size += read_len; + pg_free(lsn_map); + lsn_map = NULL; } - file->write_size = (int64) file->read_size; - /* finish CRC calculation and store into pgFile */ - FIN_CRC32C(crc); - file->crc = crc; + return lsn_map; +} - /* update file permission */ - if (chmod(to_path, st.st_mode) == -1) +/* Every page in data file contains BackupPageHeader, extract it */ +bool +get_page_header(FILE *in, const char *fullpath, BackupPageHeader* bph, + pg_crc32 *crc, bool use_crc32c) +{ + /* read BackupPageHeader */ + size_t read_len = fread(bph, 1, sizeof(BackupPageHeader), in); + + if (ferror(in)) + elog(ERROR, "Cannot read file \"%s\": %s", + fullpath, strerror(errno)); + + if (read_len != sizeof(BackupPageHeader)) { - errno_tmp = errno; - fclose(in); - fclose(out); - elog(ERROR, "cannot change mode of \"%s\": %s", to_path, - strerror(errno_tmp)); + if (read_len == 0 && feof(in)) + return false; /* EOF found */ + else if (read_len != 0 && feof(in)) + elog(ERROR, + "Odd size page found at offset %lu of \"%s\"", + ftell(in), fullpath); + else + elog(ERROR, "Cannot read header at offset %lu of \"%s\": %s", + ftell(in), fullpath, strerror(errno)); } - if (fflush(out) != 0 || - fsync(fileno(out)) != 0 || - fclose(out)) - elog(ERROR, "cannot write \"%s\": %s", to_path, strerror(errno)); - fclose(in); + /* In older versions < 2.4.0, when crc for file was calculated, header was + * not included in crc calculations. Now it is. And now we have + * the problem of backward compatibility for backups of old versions + */ + if (crc) + COMP_FILE_CRC32(use_crc32c, *crc, bph, read_len); + if (bph->block == 0 && bph->compressed_size == 0) + elog(ERROR, "Empty block in file \"%s\"", fullpath); + + Assert(bph->compressed_size != 0); return true; } -/* - * Move file from one backup to another. - * We do not apply compression to these files, because - * it is either small control file or already compressed cfs file. - */ -void -move_file(const char *from_root, const char *to_root, pgFile *file) +/* Open local backup file for writing, set permissions and buffering */ +FILE* +open_local_file_rw(const char *to_fullpath, char **out_buf, uint32 buf_size) { - char to_path[MAXPGPATH]; + FILE *out = NULL; + /* open backup file for write */ + out = fopen(to_fullpath, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open backup file \"%s\": %s", + to_fullpath, strerror(errno)); - join_path_components(to_path, to_root, file->path + strlen(from_root) + 1); - if (rename(file->path, to_path) == -1) - elog(ERROR, "Cannot move file \"%s\" to path \"%s\": %s", - file->path, to_path, strerror(errno)); -} + /* update file permission */ + if (chmod(to_fullpath, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); -#ifdef HAVE_LIBZ -/* - * Show error during work with compressed file - */ -static const char * -get_gz_error(gzFile gzf, int errnum) -{ - int gz_errnum; - const char *errmsg; + /* enable stdio buffering for output file */ + *out_buf = pgut_malloc(buf_size); + setvbuf(out, *out_buf, _IOFBF, buf_size); - errmsg = gzerror(gzf, &gz_errnum); - if (gz_errnum == Z_ERRNO) - return strerror(errnum); - else - return errmsg; + return out; } -#endif -/* - * Copy file attributes - */ -static void -copy_meta(const char *from_path, const char *to_path, bool unlink_on_error) +/* backup local file */ +int +send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_fullpath, + pgFile *file, XLogRecPtr prev_backup_start_lsn, CompressAlg calg, int clevel, + uint32 checksum_version, bool use_pagemap, BackupPageHeader2 **headers, + BackupMode backup_mode, int ptrack_version_num, const char *ptrack_schema) { - struct stat st; - - if (stat(from_path, &st) == -1) + FILE *in = NULL; + FILE *out = NULL; + int hdr_num = -1; + off_t cur_pos_out = 0; + char curr_page[BLCKSZ]; + int n_blocks_read = 0; + BlockNumber blknum = 0; + datapagemap_iterator_t *iter = NULL; + int compressed_size = 0; + + /* stdio buffers */ + char *in_buf = NULL; + char *out_buf = NULL; + + /* open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + if (in == NULL) { - if (unlink_on_error) - unlink(to_path); - elog(ERROR, "Cannot stat file \"%s\": %s", - from_path, strerror(errno)); - } + /* + * If file is not found, this is not en error. + * It could have been deleted by concurrent postgres transaction. + */ + if (errno == ENOENT) + return FILE_MISSING; - if (chmod(to_path, st.st_mode) == -1) - { - if (unlink_on_error) - unlink(to_path); - elog(ERROR, "Cannot change mode of file \"%s\": %s", - to_path, strerror(errno)); + elog(ERROR, "Cannot open file \"%s\": %s", from_fullpath, strerror(errno)); } -} - -/* - * Copy WAL segment from pgdata to archive catalog with possible compression. - */ -void -push_wal_file(const char *from_path, const char *to_path, bool is_compress, - bool overwrite) -{ - FILE *in = NULL; - FILE *out=NULL; - char buf[XLOG_BLCKSZ]; - const char *to_path_p = to_path; - char to_path_temp[MAXPGPATH]; - int errno_temp; -#ifdef HAVE_LIBZ - char gz_to_path[MAXPGPATH]; - gzFile gz_out = NULL; -#endif - - /* open file for read */ - in = fopen(from_path, PG_BINARY_R); - if (in == NULL) - elog(ERROR, "Cannot open source WAL file \"%s\": %s", from_path, - strerror(errno)); + /* + * Enable stdio buffering for local input file, + * unless the pagemap is involved, which + * imply a lot of random access. + */ - /* open backup file for write */ -#ifdef HAVE_LIBZ - if (is_compress) + if (use_pagemap) { - snprintf(gz_to_path, sizeof(gz_to_path), "%s.gz", to_path); - - if (!overwrite && fileExists(gz_to_path)) - elog(ERROR, "WAL segment \"%s\" already exists.", gz_to_path); - - snprintf(to_path_temp, sizeof(to_path_temp), "%s.partial", gz_to_path); - - gz_out = gzopen(to_path_temp, PG_BINARY_W); - if (gzsetparams(gz_out, compress_level, Z_DEFAULT_STRATEGY) != Z_OK) - elog(ERROR, "Cannot set compression level %d to file \"%s\": %s", - compress_level, to_path_temp, get_gz_error(gz_out, errno)); + iter = datapagemap_iterate(&file->pagemap); + datapagemap_next(iter, &blknum); /* set first block */ - to_path_p = gz_to_path; + setvbuf(in, NULL, _IONBF, BUFSIZ); } else -#endif { - if (!overwrite && fileExists(to_path)) - elog(ERROR, "WAL segment \"%s\" already exists.", to_path); - - snprintf(to_path_temp, sizeof(to_path_temp), "%s.partial", to_path); - - out = fopen(to_path_temp, PG_BINARY_W); - if (out == NULL) - elog(ERROR, "Cannot open destination WAL file \"%s\": %s", - to_path_temp, strerror(errno)); + in_buf = pgut_malloc(STDIO_BUFSIZE); + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); } - /* copy content */ - for (;;) + while (blknum < file->n_blocks) { - size_t read_len = 0; - - read_len = fread(buf, 1, sizeof(buf), in); + PageState page_st; + int rc = prepare_page(conn_arg, file, prev_backup_start_lsn, + blknum, in, backup_mode, curr_page, + true, checksum_version, + ptrack_version_num, ptrack_schema, + from_fullpath, &page_st); + if (rc == PageIsTruncated) + break; - if (ferror(in)) + else if (rc == PageIsOk) { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, - "Cannot read source WAL file \"%s\": %s", - from_path, strerror(errno_temp)); - } + /* lazily open backup file (useful for s3) */ + if (!out) + out = open_local_file_rw(to_fullpath, &out_buf, STDIO_BUFSIZE); - if (read_len > 0) - { -#ifdef HAVE_LIBZ - if (is_compress) - { - if (gzwrite(gz_out, buf, read_len) != read_len) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot write to compressed WAL file \"%s\": %s", - to_path_temp, get_gz_error(gz_out, errno_temp)); - } - } + hdr_num++; + + if (!*headers) + *headers = (BackupPageHeader2 *) pgut_malloc(sizeof(BackupPageHeader2)); else -#endif - { - if (fwrite(buf, 1, read_len, out) != read_len) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot write to WAL file \"%s\": %s", - to_path_temp, strerror(errno_temp)); - } - } - } + *headers = (BackupPageHeader2 *) pgut_realloc(*headers, (hdr_num+1) * sizeof(BackupPageHeader2)); - if (feof(in) || read_len == 0) - break; - } + (*headers)[hdr_num].block = blknum; + (*headers)[hdr_num].pos = cur_pos_out; + (*headers)[hdr_num].lsn = page_st.lsn; + (*headers)[hdr_num].checksum = page_st.checksum; -#ifdef HAVE_LIBZ - if (is_compress) - { - if (gzclose(gz_out) != 0) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot close compressed WAL file \"%s\": %s", - to_path_temp, get_gz_error(gz_out, errno_temp)); + compressed_size = compress_and_backup_page(file, blknum, in, out, &(file->crc), + rc, curr_page, calg, clevel, + from_fullpath, to_fullpath); + cur_pos_out += compressed_size + sizeof(BackupPageHeader); } - } - else -#endif - { - if (fflush(out) != 0 || - fsync(fileno(out)) != 0 || - fclose(out)) + + n_blocks_read++; + + /* next block */ + if (use_pagemap) { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot write WAL file \"%s\": %s", - to_path_temp, strerror(errno_temp)); + /* exit if pagemap is exhausted */ + if (!datapagemap_next(iter, &blknum)) + break; } + else + blknum++; } - if (fclose(in)) + /* + * Add dummy header, so we can later extract the length of last header + * as difference between their offsets. + */ + if (*headers) { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot close source WAL file \"%s\": %s", - from_path, strerror(errno_temp)); + file->n_headers = hdr_num +1; + *headers = (BackupPageHeader2 *) pgut_realloc(*headers, (hdr_num+2) * sizeof(BackupPageHeader2)); + (*headers)[hdr_num+1].pos = cur_pos_out; } - /* update file permission. */ - copy_meta(from_path, to_path_temp, true); + /* cleanup */ + if (in && fclose(in)) + elog(ERROR, "Cannot close the source file \"%s\": %s", + to_fullpath, strerror(errno)); - if (rename(to_path_temp, to_path_p) < 0) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot rename WAL file \"%s\" to \"%s\": %s", - to_path_temp, to_path_p, strerror(errno_temp)); - } + /* close local output file */ + if (out && fclose(out)) + elog(ERROR, "Cannot close the backup file \"%s\": %s", + to_fullpath, strerror(errno)); -#ifdef HAVE_LIBZ - if (is_compress) - elog(INFO, "WAL file compressed to \"%s\"", gz_to_path); -#endif + pg_free(iter); + pg_free(in_buf); + pg_free(out_buf); + + return n_blocks_read; } /* - * Copy WAL segment from archive catalog to pgdata with possible decompression. + * Attempt to open header file, read content and return as + * array of headers. + * TODO: some access optimizations would be great here: + * less fseeks, buffering, descriptor sharing, etc. */ -void -get_wal_file(const char *from_path, const char *to_path) +BackupPageHeader2* +get_data_file_headers(HeaderMap *hdr_map, pgFile *file, uint32 backup_version, bool strict) { - FILE *in = NULL; - FILE *out; - char buf[XLOG_BLCKSZ]; - const char *from_path_p = from_path; - char to_path_temp[MAXPGPATH]; - int errno_temp; - bool is_decompress = false; - -#ifdef HAVE_LIBZ - char gz_from_path[MAXPGPATH]; - gzFile gz_in = NULL; -#endif + bool success = false; + FILE *in = NULL; + size_t read_len = 0; + pg_crc32 hdr_crc; + BackupPageHeader2 *headers = NULL; + /* header decompression */ + int z_len = 0; + char *zheaders = NULL; + const char *errormsg = NULL; + + if (backup_version < 20400) + return NULL; + + if (file->n_headers <= 0) + return NULL; + + /* TODO: consider to make this descriptor thread-specific */ + in = fopen(hdr_map->path, PG_BINARY_R); + + if (!in) + { + elog(strict ? ERROR : WARNING, "Cannot open header file \"%s\": %s", hdr_map->path, strerror(errno)); + return NULL; + } + /* disable buffering for header file */ + setvbuf(in, NULL, _IONBF, BUFSIZ); - /* open file for read */ - in = fopen(from_path, PG_BINARY_R); - if (in == NULL) + if (fseek(in, file->hdr_off, SEEK_SET)) { -#ifdef HAVE_LIBZ - /* - * Maybe we need to decompress the file. Check it with .gz - * extension. - */ - snprintf(gz_from_path, sizeof(gz_from_path), "%s.gz", from_path); - gz_in = gzopen(gz_from_path, PG_BINARY_R); - if (gz_in == NULL) - { - if (errno == ENOENT) - { - /* There is no compressed file too, raise an error below */ - } - /* Cannot open compressed file for some reason */ - else - elog(ERROR, "Cannot open compressed WAL file \"%s\": %s", - gz_from_path, strerror(errno)); - } - else - { - /* Found compressed file */ - is_decompress = true; - from_path_p = gz_from_path; - } -#endif - /* Didn't find compressed file */ - if (!is_decompress) - elog(ERROR, "Cannot open source WAL file \"%s\": %s", - from_path, strerror(errno)); + elog(strict ? ERROR : WARNING, "Cannot seek to position %lu in page header map \"%s\": %s", + file->hdr_off, hdr_map->path, strerror(errno)); + goto cleanup; } - /* open backup file for write */ - snprintf(to_path_temp, sizeof(to_path_temp), "%s.partial", to_path); + /* + * The actual number of headers in header file is n+1, last one is a dummy header, + * used for calculation of read_len for actual last header. + */ + read_len = (file->n_headers+1) * sizeof(BackupPageHeader2); - out = fopen(to_path_temp, PG_BINARY_W); - if (out == NULL) - elog(ERROR, "Cannot open destination WAL file \"%s\": %s", - to_path_temp, strerror(errno)); + /* allocate memory for compressed headers */ + zheaders = pgut_malloc(file->hdr_size); + memset(zheaders, 0, file->hdr_size); - /* copy content */ - for (;;) + if (fread(zheaders, 1, file->hdr_size, in) != file->hdr_size) { - size_t read_len = 0; - -#ifdef HAVE_LIBZ - if (is_decompress) - { - read_len = gzread(gz_in, buf, sizeof(buf)); - if (read_len != sizeof(buf) && !gzeof(gz_in)) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot read compressed WAL file \"%s\": %s", - gz_from_path, get_gz_error(gz_in, errno_temp)); - } - } - else -#endif - { - read_len = fread(buf, 1, sizeof(buf), in); - if (ferror(in)) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot read source WAL file \"%s\": %s", - from_path, strerror(errno_temp)); - } - } + elog(strict ? ERROR : WARNING, "Cannot read header file at offset: %li len: %i \"%s\": %s", + file->hdr_off, file->hdr_size, hdr_map->path, strerror(errno)); + goto cleanup; + } - if (read_len > 0) - { - if (fwrite(buf, 1, read_len, out) != read_len) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot write to WAL file \"%s\": %s", to_path_temp, - strerror(errno_temp)); - } - } + /* allocate memory for uncompressed headers */ + headers = pgut_malloc(read_len); + memset(headers, 0, read_len); - /* Check for EOF */ -#ifdef HAVE_LIBZ - if (is_decompress) - { - if (gzeof(gz_in) || read_len == 0) - break; - } + z_len = do_decompress(headers, read_len, zheaders, file->hdr_size, + ZLIB_COMPRESS, &errormsg); + if (z_len <= 0) + { + if (errormsg) + elog(strict ? ERROR : WARNING, "An error occured during metadata decompression for file \"%s\": %s", + file->rel_path, errormsg); else -#endif - { - if (feof(in) || read_len == 0) - break; - } - } + elog(strict ? ERROR : WARNING, "An error occured during metadata decompression for file \"%s\": %i", + file->rel_path, z_len); - if (fflush(out) != 0 || - fsync(fileno(out)) != 0 || - fclose(out)) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot write WAL file \"%s\": %s", - to_path_temp, strerror(errno_temp)); + goto cleanup; } -#ifdef HAVE_LIBZ - if (is_decompress) - { - if (gzclose(gz_in) != 0) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot close compressed WAL file \"%s\": %s", - gz_from_path, get_gz_error(gz_in, errno_temp)); - } - } - else -#endif + /* validate checksum */ + INIT_FILE_CRC32(true, hdr_crc); + COMP_FILE_CRC32(true, hdr_crc, headers, read_len); + FIN_FILE_CRC32(true, hdr_crc); + + if (hdr_crc != file->hdr_crc) { - if (fclose(in)) - { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot close source WAL file \"%s\": %s", - from_path, strerror(errno_temp)); - } + elog(strict ? ERROR : WARNING, "Header map for file \"%s\" crc mismatch \"%s\" " + "offset: %lu, len: %lu, current: %u, expected: %u", + file->rel_path, hdr_map->path, file->hdr_off, read_len, hdr_crc, file->hdr_crc); + goto cleanup; } - /* update file permission. */ - copy_meta(from_path_p, to_path_temp, true); + success = true; + +cleanup: - if (rename(to_path_temp, to_path) < 0) + pg_free(zheaders); + if (in && fclose(in)) + elog(ERROR, "Cannot close file \"%s\"", hdr_map->path); + + if (!success) { - errno_temp = errno; - unlink(to_path_temp); - elog(ERROR, "Cannot rename WAL file \"%s\" to \"%s\": %s", - to_path_temp, to_path, strerror(errno_temp)); + pg_free(headers); + headers = NULL; } -#ifdef HAVE_LIBZ - if (is_decompress) - elog(INFO, "WAL file decompressed from \"%s\"", gz_from_path); -#endif + return headers; } -/* - * Calculate checksum of various files which are not copied from PGDATA, - * but created in process of backup, such as stream XLOG files, - * PG_TABLESPACE_MAP_FILE and PG_BACKUP_LABEL_FILE. - */ -bool -calc_file_checksum(pgFile *file) +/* write headers of all blocks belonging to file to header map and + * save its offset and size */ +void +write_page_headers(BackupPageHeader2 *headers, pgFile *file, HeaderMap *hdr_map, bool is_merge) { - FILE *in; - size_t read_len = 0; - int errno_tmp; - char buf[BLCKSZ]; - struct stat st; - pg_crc32 crc; + size_t read_len = 0; + char *map_path = NULL; + /* header compression */ + int z_len = 0; + char *zheaders = NULL; + const char *errormsg = NULL; + + if (file->n_headers <= 0) + return; - Assert(S_ISREG(file->mode)); - INIT_CRC32C(crc); + /* when running merge we must write headers into temp map */ + map_path = (is_merge) ? hdr_map->path_tmp : hdr_map->path; + read_len = (file->n_headers+1) * sizeof(BackupPageHeader2); - /* reset size summary */ - file->read_size = 0; - file->write_size = 0; + /* calculate checksums */ + INIT_FILE_CRC32(true, file->hdr_crc); + COMP_FILE_CRC32(true, file->hdr_crc, headers, read_len); + FIN_FILE_CRC32(true, file->hdr_crc); - /* open backup mode file for read */ - in = fopen(file->path, PG_BINARY_R); - if (in == NULL) - { - FIN_CRC32C(crc); - file->crc = crc; + zheaders = pgut_malloc(read_len*2); + memset(zheaders, 0, read_len*2); - /* maybe deleted, it's not error */ - if (errno == ENOENT) - return false; + /* compress headers */ + z_len = do_compress(zheaders, read_len*2, headers, + read_len, ZLIB_COMPRESS, 1, &errormsg); - elog(ERROR, "cannot open source file \"%s\": %s", file->path, - strerror(errno)); - } + /* writing to header map must be serialized */ + pthread_lock(&(hdr_map->mutex)); /* what if we crash while trying to obtain mutex? */ - /* stat source file to change mode of destination file */ - if (fstat(fileno(in), &st) == -1) + if (!hdr_map->fp) { - fclose(in); - elog(ERROR, "cannot stat \"%s\": %s", file->path, - strerror(errno)); - } + elog(LOG, "Creating page header map \"%s\"", map_path); - for (;;) - { - read_len = fread(buf, 1, sizeof(buf), in); + hdr_map->fp = fopen(map_path, PG_BINARY_W); + if (hdr_map->fp == NULL) + elog(ERROR, "Cannot open header file \"%s\": %s", + map_path, strerror(errno)); - if(read_len == 0) - break; + /* enable buffering for header file */ + hdr_map->buf = pgut_malloc(LARGE_CHUNK_SIZE); + setvbuf(hdr_map->fp, hdr_map->buf, _IOFBF, LARGE_CHUNK_SIZE); - /* update CRC */ - COMP_CRC32C(crc, buf, read_len); + /* update file permission */ + if (chmod(map_path, FILE_PERMISSION) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", map_path, + strerror(errno)); - file->write_size += read_len; - file->read_size += read_len; + file->hdr_off = 0; } + else + file->hdr_off = hdr_map->offset; - errno_tmp = errno; - if (!feof(in)) + if (z_len <= 0) { - fclose(in); - elog(ERROR, "cannot read backup mode file \"%s\": %s", - file->path, strerror(errno_tmp)); + if (errormsg) + elog(ERROR, "An error occured during compressing metadata for file \"%s\": %s", + file->rel_path, errormsg); + else + elog(ERROR, "An error occured during compressing metadata for file \"%s\": %i", + file->rel_path, z_len); } - /* finish CRC calculation and store into pgFile */ - FIN_CRC32C(crc); - file->crc = crc; + elog(VERBOSE, "Writing headers for file \"%s\" offset: %li, len: %i, crc: %u", + file->rel_path, file->hdr_off, z_len, file->hdr_crc); - fclose(in); + if (fwrite(zheaders, 1, z_len, hdr_map->fp) != z_len) + elog(ERROR, "Cannot write to file \"%s\": %s", map_path, strerror(errno)); - return true; + file->hdr_size = z_len; /* save the length of compressed headers */ + hdr_map->offset += z_len; /* update current offset in map */ + + /* End critical section */ + pthread_mutex_unlock(&(hdr_map->mutex)); + + pg_free(zheaders); +} + +void +init_header_map(pgBackup *backup) +{ + backup->hdr_map.fp = NULL; + backup->hdr_map.buf = NULL; + join_path_components(backup->hdr_map.path, backup->root_dir, HEADER_MAP); + join_path_components(backup->hdr_map.path_tmp, backup->root_dir, HEADER_MAP_TMP); + backup->hdr_map.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; +} + +void +cleanup_header_map(HeaderMap *hdr_map) +{ + /* cleanup descriptor */ + if (hdr_map->fp && fclose(hdr_map->fp)) + elog(ERROR, "Cannot close file \"%s\"", hdr_map->path); + hdr_map->fp = NULL; + hdr_map->offset = 0; + pg_free(hdr_map->buf); + hdr_map->buf = NULL; } diff --git a/src/delete.c b/src/delete.c index 0829e7255..b3c50a4b9 100644 --- a/src/delete.c +++ b/src/delete.c @@ -3,7 +3,7 @@ * delete.c: delete backup files. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -14,67 +14,82 @@ #include #include -static int pgBackupDeleteFiles(pgBackup *backup); -static void delete_walfiles(XLogRecPtr oldest_lsn, TimeLineID oldest_tli); - -int +static void delete_walfiles_in_tli(XLogRecPtr keep_lsn, timelineInfo *tli, + uint32 xlog_seg_size, bool dry_run); +static void do_retention_internal(parray *backup_list, parray *to_keep_list, + parray *to_purge_list); +static void do_retention_merge(parray *backup_list, parray *to_keep_list, + parray *to_purge_list); +static void do_retention_purge(parray *to_keep_list, parray *to_purge_list); +static void do_retention_wal(bool dry_run); + +// TODO: more useful messages for dry run. +static bool backup_deleted = false; /* At least one backup was deleted */ +static bool backup_merged = false; /* At least one merge was enacted */ +static bool wal_deleted = false; /* At least one WAL segments was deleted */ + +void do_delete(time_t backup_id) { int i; parray *backup_list, *delete_list; - time_t parent_id = 0; - bool backup_found = false; - XLogRecPtr oldest_lsn = InvalidXLogRecPtr; - TimeLineID oldest_tli = 0; - - /* Get exclusive lock of backup catalog */ - catalog_lock(); + pgBackup *target_backup = NULL; + size_t size_to_delete = 0; + char size_to_delete_pretty[20]; /* Get complete list of backups */ - backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); - if (backup_id != 0) + delete_list = parray_new(); + + /* Find backup to be deleted and make increment backups array to be deleted */ + for (i = 0; i < parray_num(backup_list); i++) { - delete_list = parray_new(); + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); - /* Find backup to be deleted and make increment backups array to be deleted */ - for (i = (int) parray_num(backup_list) - 1; i >= 0; i--) + if (backup->start_time == backup_id) { - pgBackup *backup = (pgBackup *) parray_get(backup_list, (size_t) i); + target_backup = backup; + break; + } + } - if (backup->start_time == backup_id) - { - parray_append(delete_list, backup); - - /* - * Do not remove next backups, if target backup was finished - * incorrectly. - */ - if (backup->status == BACKUP_STATUS_ERROR) - break; - - /* Save backup id to retreive increment backups */ - parent_id = backup->start_time; - backup_found = true; - } - else if (backup_found) - { - if (backup->backup_mode != BACKUP_MODE_FULL && - backup->parent_backup == parent_id) - { - /* Append to delete list increment backup */ - parray_append(delete_list, backup); - /* Save backup id to retreive increment backups */ - parent_id = backup->start_time; - } - else - break; - } + /* sanity */ + if (!target_backup) + elog(ERROR, "Failed to find backup %s, cannot delete", base36enc(backup_id)); + + /* form delete list */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* check if backup is descendant of delete target */ + if (is_parent(target_backup->start_time, backup, true)) + { + parray_append(delete_list, backup); + + elog(LOG, "Backup %s %s be deleted", + base36enc(backup->start_time), dry_run? "can":"will"); + + size_to_delete += backup->data_bytes; + if (backup->stream) + size_to_delete += backup->wal_bytes; } + } + + /* Report the resident size to delete */ + if (size_to_delete >= 0) + { + pretty_size(size_to_delete, size_to_delete_pretty, lengthof(size_to_delete_pretty)); + elog(INFO, "Resident data size to free by delete of backup %s : %s", + base36enc(target_backup->start_time), size_to_delete_pretty); + } - if (parray_num(delete_list) == 0) - elog(ERROR, "no backup found, cannot delete"); + if (!dry_run) + { + /* Lock marked for delete backups */ + catalog_lock_backup_list(delete_list, parray_num(delete_list) - 1, 0, false); /* Delete backups from the end of list */ for (i = (int) parray_num(delete_list) - 1; i >= 0; i--) @@ -84,333 +99,864 @@ do_delete(time_t backup_id) if (interrupted) elog(ERROR, "interrupted during delete backup"); - pgBackupDeleteFiles(backup); + delete_backup_files(backup); } - - parray_free(delete_list); } /* Clean WAL segments */ if (delete_wal) - { - /* Find oldest LSN, used by backups */ - for (i = (int) parray_num(backup_list) - 1; i >= 0; i--) - { - pgBackup *backup = (pgBackup *) parray_get(backup_list, (size_t) i); - - if (backup->status == BACKUP_STATUS_OK) - { - oldest_lsn = backup->start_lsn; - oldest_tli = backup->tli; - break; - } - } - - delete_walfiles(oldest_lsn, oldest_tli); - } + do_retention_wal(dry_run); /* cleanup */ + parray_free(delete_list); parray_walk(backup_list, pgBackupFree); parray_free(backup_list); - - return 0; } /* - * Remove backups by retention policy. Retention policy is configured by + * Merge and purge backups by retention policy. Retention policy is configured by * retention_redundancy and retention_window variables. + * + * Invalid backups handled in Oracle style, so invalid backups are ignored + * for the purpose of retention fulfillment, + * i.e. CORRUPT full backup do not taken in account when determine + * which FULL backup should be keeped for redundancy obligation(only valid do), + * but if invalid backup is not guarded by retention - it is removed */ -int -do_retention_purge(void) +void do_retention(void) { - parray *backup_list; - uint32 backup_num; - size_t i; - time_t days_threshold = time(NULL) - (retention_window * 60 * 60 * 24); - XLogRecPtr oldest_lsn = InvalidXLogRecPtr; - TimeLineID oldest_tli = 0; - bool keep_next_backup = true; /* Do not delete first full backup */ - bool backup_deleted = false; /* At least one backup was deleted */ + parray *backup_list = NULL; + parray *to_keep_list = parray_new(); + parray *to_purge_list = parray_new(); + + bool retention_is_set = false; /* At least one retention policy is set */ + bool backup_list_is_empty = false; + + backup_deleted = false; + backup_merged = false; - if (delete_expired) + /* Get a complete list of backups. */ + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + if (parray_num(backup_list) == 0) + backup_list_is_empty = true; + + if (delete_expired || merge_expired) { - if (retention_redundancy > 0) - elog(LOG, "REDUNDANCY=%u", retention_redundancy); - if (retention_window > 0) - elog(LOG, "WINDOW=%u", retention_window); + if (instance_config.retention_redundancy > 0) + elog(LOG, "REDUNDANCY=%u", instance_config.retention_redundancy); + if (instance_config.retention_window > 0) + elog(LOG, "WINDOW=%u", instance_config.retention_window); - if (retention_redundancy == 0 - && retention_window == 0) + if (instance_config.retention_redundancy == 0 && + instance_config.retention_window == 0) { + /* Retention is disabled but we still can cleanup wal */ elog(WARNING, "Retention policy is not set"); if (!delete_wal) - return 0; + return; } + else + /* At least one retention policy is active */ + retention_is_set = true; } - /* Get exclusive lock of backup catalog */ - catalog_lock(); + if (retention_is_set && backup_list_is_empty) + elog(WARNING, "Backup list is empty, retention purge and merge are problematic"); - /* Get a complete list of backups. */ - backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); - if (parray_num(backup_list) == 0) - { - elog(INFO, "backup list is empty, purging won't be executed"); - return 0; - } + /* Populate purge and keep lists, and show retention state messages */ + if (retention_is_set && !backup_list_is_empty) + do_retention_internal(backup_list, to_keep_list, to_purge_list); + + if (merge_expired && !dry_run && !backup_list_is_empty) + do_retention_merge(backup_list, to_keep_list, to_purge_list); + + if (delete_expired && !dry_run && !backup_list_is_empty) + do_retention_purge(to_keep_list, to_purge_list); - /* Find target backups to be deleted */ - if (delete_expired && - (retention_redundancy > 0 || retention_window > 0)) + /* TODO: some sort of dry run for delete_wal */ + if (delete_wal) + do_retention_wal(dry_run); + + /* TODO: consider dry-run flag */ + + if (!backup_merged) + elog(INFO, "There are no backups to merge by retention policy"); + + if (backup_deleted) + elog(INFO, "Purging finished"); + else + elog(INFO, "There are no backups to delete by retention policy"); + + if (!wal_deleted) + elog(INFO, "There is no WAL to purge by retention policy"); + + /* Cleanup */ + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); + parray_free(to_keep_list); + parray_free(to_purge_list); +} + +/* Evaluate every backup by retention policies and populate purge and keep lists. + * Also for every backup print its status ('Active' or 'Expired') according + * to active retention policies. + */ +static void +do_retention_internal(parray *backup_list, parray *to_keep_list, parray *to_purge_list) +{ + int i; + + parray *redundancy_full_backup_list = NULL; + + /* For retention calculation */ + uint32 n_full_backups = 0; + int cur_full_backup_num = 0; + time_t days_threshold = 0; + + /* For fancy reporting */ + uint32 actual_window = 0; + + /* Calculate n_full_backups and days_threshold */ + if (instance_config.retention_redundancy > 0) { - backup_num = 0; for (i = 0; i < parray_num(backup_list); i++) { pgBackup *backup = (pgBackup *) parray_get(backup_list, i); - uint32 backup_num_evaluate = backup_num; - /* Consider only validated and correct backups */ - if (backup->status != BACKUP_STATUS_OK) - continue; - /* - * When a valid full backup was found, we can delete the - * backup that is older than it using the number of generations. + /* Consider only valid FULL backups for Redundancy */ + if (instance_config.retention_redundancy > 0 && + backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + n_full_backups++; + + /* Add every FULL backup that satisfy Redundancy policy to separate list */ + if (n_full_backups <= instance_config.retention_redundancy) + { + if (!redundancy_full_backup_list) + redundancy_full_backup_list = parray_new(); + + parray_append(redundancy_full_backup_list, backup); + } + } + } + /* Sort list of full backups to keep */ + if (redundancy_full_backup_list) + parray_qsort(redundancy_full_backup_list, pgBackupCompareIdDesc); + } + + if (instance_config.retention_window > 0) + { + days_threshold = current_time - + (instance_config.retention_window * 60 * 60 * 24); + } + + elog(INFO, "Evaluate backups by retention"); + for (i = (int) parray_num(backup_list) - 1; i >= 0; i--) + { + + bool redundancy_keep = false; + time_t backup_time = 0; + pgBackup *backup = (pgBackup *) parray_get(backup_list, (size_t) i); + + /* check if backup`s FULL ancestor is in redundancy list */ + if (redundancy_full_backup_list) + { + pgBackup *full_backup = find_parent_full_backup(backup); + + if (full_backup && parray_bsearch(redundancy_full_backup_list, + full_backup, + pgBackupCompareIdDesc)) + redundancy_keep = true; + } + + /* Remember the serial number of latest valid FULL backup */ + if (backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + cur_full_backup_num++; + } + + /* Invalid and running backups most likely to have recovery_time == 0, + * so in this case use start_time instead. + */ + if (backup->recovery_time) + backup_time = backup->recovery_time; + else + backup_time = backup->start_time; + + /* Check if backup in needed by retention policy */ + if ((days_threshold == 0 || (days_threshold > backup_time)) && + (instance_config.retention_redundancy == 0 || !redundancy_keep)) + { + /* This backup is not guarded by retention + * + * Redundancy = 1 + * FULL CORRUPT in retention (not count toward redundancy limit) + * FULL in retention + * ------retention redundancy ------- + * PAGE3 in retention + * ------retention window ----------- + * PAGE2 out of retention + * PAGE1 out of retention + * FULL out of retention <- We are here + * FULL CORRUPT out of retention */ - if (backup->backup_mode == BACKUP_MODE_FULL) - backup_num++; - /* Evaluate retention_redundancy if this backup is eligible for removal */ - if (keep_next_backup || - retention_redundancy >= backup_num_evaluate + 1 || - (retention_window > 0 && backup->recovery_time >= days_threshold)) + /* Save backup from purge if backup is pinned and + * expire date is not yet due. + */ + if ((backup->expire_time > 0) && + (backup->expire_time > current_time)) { - /* Save LSN and Timeline to remove unnecessary WAL segments */ - oldest_lsn = backup->start_lsn; - oldest_tli = backup->tli; - - /* Save parent backup of this incremental backup */ - if (backup->backup_mode != BACKUP_MODE_FULL) - keep_next_backup = true; - /* - * Previous incremental backup was kept or this is first backup - * so do not delete this backup. - */ - else - keep_next_backup = false; + char expire_timestamp[100]; + time2iso(expire_timestamp, lengthof(expire_timestamp), backup->expire_time); + elog(LOG, "Backup %s is pinned until '%s', retain", + base36enc(backup->start_time), expire_timestamp); continue; } - /* Delete backup and update status to DELETED */ - pgBackupDeleteFiles(backup); - backup_deleted = true; + /* Add backup to purge_list */ + elog(VERBOSE, "Mark backup %s for purge.", base36enc(backup->start_time)); + parray_append(to_purge_list, backup); + continue; } } - /* - * If oldest_lsn and oldest_tli weren`t set because previous step was skipped - * then set them now if we are going to purge WAL + /* sort keep_list and purge list */ + parray_qsort(to_keep_list, pgBackupCompareIdDesc); + parray_qsort(to_purge_list, pgBackupCompareIdDesc); + + /* FULL + * PAGE + * PAGE <- Only such backups must go into keep list + ---------retention window ---- + * PAGE + * FULL + * PAGE + * FULL */ - if (delete_wal && (XLogRecPtrIsInvalid(oldest_lsn))) + + for (i = 0; i < parray_num(backup_list); i++) { - pgBackup *backup = (pgBackup *) parray_get(backup_list, parray_num(backup_list) - 1); - oldest_lsn = backup->start_lsn; - oldest_tli = backup->tli; + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* Do not keep invalid backups by retention + * Turns out it was not a very good idea - [Issue #114] + */ + //if (backup->status != BACKUP_STATUS_OK && + // backup->status != BACKUP_STATUS_DONE) + // continue; + + /* only incremental backups should be in keep list */ + if (backup->backup_mode == BACKUP_MODE_FULL) + continue; + + /* orphan backup cannot be in keep list */ + if (!backup->parent_backup_link) + continue; + + /* skip if backup already in purge list */ + if (parray_bsearch(to_purge_list, backup, pgBackupCompareIdDesc)) + continue; + + /* if parent in purge_list, add backup to keep list */ + if (parray_bsearch(to_purge_list, + backup->parent_backup_link, + pgBackupCompareIdDesc)) + { + /* make keep list a bit more compact */ + parray_append(to_keep_list, backup); + continue; + } } - /* Be paranoid */ - if (XLogRecPtrIsInvalid(oldest_lsn)) - elog(ERROR, "Not going to purge WAL because LSN is invalid"); + /* Message about retention state of backups + * TODO: message is ugly, rewrite it to something like show table in stdout. + */ - /* Purge WAL files */ - if (delete_wal) + cur_full_backup_num = 1; + for (i = 0; i < parray_num(backup_list); i++) { - delete_walfiles(oldest_lsn, oldest_tli); + char *action = "Active"; + uint32 pinning_window = 0; + + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + if (parray_bsearch(to_purge_list, backup, pgBackupCompareIdDesc)) + action = "Expired"; + + if (backup->recovery_time == 0) + actual_window = 0; + else + actual_window = (current_time - backup->recovery_time)/(3600 * 24); + + /* For pinned backups show expire date */ + if (backup->expire_time > 0 && backup->expire_time > backup->recovery_time) + pinning_window = (backup->expire_time - backup->recovery_time)/(3600 * 24); + + /* TODO: add ancestor(chain full backup) ID */ + elog(INFO, "Backup %s, mode: %s, status: %s. Redundancy: %i/%i, Time Window: %ud/%ud. %s", + base36enc(backup->start_time), + pgBackupGetBackupMode(backup), + status2str(backup->status), + cur_full_backup_num, + instance_config.retention_redundancy, + actual_window, + pinning_window ? pinning_window : instance_config.retention_window, + action); + + if (backup->backup_mode == BACKUP_MODE_FULL) + cur_full_backup_num++; } +} - /* Cleanup */ - parray_walk(backup_list, pgBackupFree); - parray_free(backup_list); +/* Merge partially expired incremental chains */ +static void +do_retention_merge(parray *backup_list, parray *to_keep_list, parray *to_purge_list) +{ + int i; + int j; + + /* IMPORTANT: we can merge to only those FULL backup, that is NOT + * guarded by retention and final target of such merge must be + * an incremental backup that is guarded by retention !!! + * + * PAGE4 E + * PAGE3 D + --------retention window --- + * PAGE2 C + * PAGE1 B + * FULL A + * + * after retention merge: + * PAGE4 E + * FULL D + */ - if (backup_deleted) - elog(INFO, "Purging finished"); - else - elog(INFO, "Nothing to delete by retention policy"); + /* Merging happens here */ + for (i = 0; i < parray_num(to_keep_list); i++) + { + char *keep_backup_id = NULL; + pgBackup *full_backup = NULL; + parray *merge_list = NULL; - return 0; + pgBackup *keep_backup = (pgBackup *) parray_get(to_keep_list, i); + + /* keep list may shrink during merge */ + if (!keep_backup) + continue; + + elog(INFO, "Consider backup %s for merge", base36enc(keep_backup->start_time)); + + /* Got valid incremental backup, find its FULL ancestor */ + full_backup = find_parent_full_backup(keep_backup); + + /* Failed to find parent */ + if (!full_backup) + { + elog(WARNING, "Failed to find FULL parent for %s", base36enc(keep_backup->start_time)); + continue; + } + + /* Check that ancestor is in purge_list */ + if (!parray_bsearch(to_purge_list, + full_backup, + pgBackupCompareIdDesc)) + { + elog(WARNING, "Skip backup %s for merging, " + "because his FULL parent is not marked for purge", base36enc(keep_backup->start_time)); + continue; + } + + /* FULL backup in purge list, thanks to compacting of keep_list current backup is + * final target for merge, but there could be intermediate incremental + * backups from purge_list. + */ + + keep_backup_id = base36enc_dup(keep_backup->start_time); + elog(INFO, "Merge incremental chain between full backup %s and backup %s", + base36enc(full_backup->start_time), keep_backup_id); + pg_free(keep_backup_id); + + merge_list = parray_new(); + + /* Form up a merge list */ + while (keep_backup->parent_backup_link) + { + parray_append(merge_list, keep_backup); + keep_backup = keep_backup->parent_backup_link; + } + + /* sanity */ + if (!merge_list) + continue; + + /* sanity */ + if (parray_num(merge_list) == 0) + { + parray_free(merge_list); + continue; + } + + /* In the end add FULL backup for easy locking */ + parray_append(merge_list, full_backup); + + /* Remove FULL backup from purge list */ + parray_rm(to_purge_list, full_backup, pgBackupCompareId); + + /* Lock merge chain */ + catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true); + + /* Consider this extreme case */ + // PAGEa1 PAGEb1 both valid + // \ / + // FULL + + /* Check that FULL backup do not has multiple descendants + * full_backup always point to current full_backup after merge + */ +// if (is_prolific(backup_list, full_backup)) +// { +// elog(WARNING, "Backup %s has multiple valid descendants. " +// "Automatic merge is not possible.", base36enc(full_backup->start_time)); +// } + + /* Merge list example: + * 0 PAGE3 + * 1 PAGE2 + * 2 PAGE1 + * 3 FULL + * + * Merge incremental chain from PAGE3 into FULL. + */ + + keep_backup = parray_get(merge_list, 0); + merge_chain(merge_list, full_backup, keep_backup); + backup_merged = true; + + for (j = parray_num(merge_list) - 2; j >= 0; j--) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(merge_list, j); + + /* Try to remove merged incremental backup from both keep and purge lists */ + parray_rm(to_purge_list, tmp_backup, pgBackupCompareId); + parray_set(to_keep_list, i, NULL); + } + + pgBackupValidate(full_backup, NULL); + if (full_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Merging of backup %s failed", base36enc(full_backup->start_time)); + + /* Cleanup */ + parray_free(merge_list); + } + + elog(INFO, "Retention merging finished"); + +} + +/* Purge expired backups */ +static void +do_retention_purge(parray *to_keep_list, parray *to_purge_list) +{ + int i; + int j; + + /* Remove backups by retention policy. Retention policy is configured by + * retention_redundancy and retention_window + * Remove only backups, that do not have children guarded by retention + * + * TODO: We do not consider the situation if child is marked for purge + * but parent isn`t. Maybe something bad happened with time on server? + */ + + for (j = 0; j < parray_num(to_purge_list); j++) + { + bool purge = true; + + pgBackup *delete_backup = (pgBackup *) parray_get(to_purge_list, j); + + elog(LOG, "Consider backup %s for purge", + base36enc(delete_backup->start_time)); + + /* Evaluate marked for delete backup against every backup in keep list. + * If marked for delete backup is recognized as parent of one of those, + * then this backup should not be deleted. + */ + for (i = 0; i < parray_num(to_keep_list); i++) + { + char *keeped_backup_id; + + pgBackup *keep_backup = (pgBackup *) parray_get(to_keep_list, i); + + /* item could have been nullified in merge */ + if (!keep_backup) + continue; + + /* Full backup cannot be a descendant */ + if (keep_backup->backup_mode == BACKUP_MODE_FULL) + continue; + + keeped_backup_id = base36enc_dup(keep_backup->start_time); + + elog(LOG, "Check if backup %s is parent of backup %s", + base36enc(delete_backup->start_time), keeped_backup_id); + + if (is_parent(delete_backup->start_time, keep_backup, true)) + { + + /* We must not delete this backup, evict it from purge list */ + elog(LOG, "Retain backup %s because his " + "descendant %s is guarded by retention", + base36enc(delete_backup->start_time), keeped_backup_id); + + purge = false; + pg_free(keeped_backup_id); + break; + } + pg_free(keeped_backup_id); + } + + /* Retain backup */ + if (!purge) + continue; + + /* Actual purge */ + if (!lock_backup(delete_backup, false)) + { + /* If the backup still is used, do not interrupt and go to the next */ + elog(WARNING, "Cannot lock backup %s directory, skip purging", + base36enc(delete_backup->start_time)); + continue; + } + + /* Delete backup and update status to DELETED */ + delete_backup_files(delete_backup); + backup_deleted = true; + + } +} + +/* + * Purge WAL + * Iterate over timelines + * Look for WAL segment not reachable from existing backups + * and delete them. + */ +static void +do_retention_wal(bool dry_run) +{ + parray *tli_list; + int i; + + tli_list = catalog_get_timelines(&instance_config); + + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + /* + * Empty timeline (only mentioned in timeline history file) + * has nothing to cleanup. + */ + if (tlinfo->n_xlog_files == 0 && parray_num(tlinfo->xlog_filelist) == 0) + continue; + + /* + * If closest backup exists, then timeline is reachable from + * at least one backup and no file should be removed. + * Unless wal-depth is enabled. + */ + if ((tlinfo->closest_backup) && instance_config.wal_depth <= 0) + continue; + + /* WAL retention keeps this timeline from purge */ + if (instance_config.wal_depth >= 0 && tlinfo->anchor_tli > 0 && + tlinfo->anchor_tli != tlinfo->tli) + continue; + + /* + * Purge all WAL segments before START LSN of oldest backup. + * If timeline doesn't have a backup, then whole timeline + * can be safely purged. + * Note, that oldest_backup is not necessarily valid here, + * but still we keep wal for it. + * If wal-depth is enabled then use anchor_lsn instead + * of oldest_backup. + */ + if (tlinfo->oldest_backup) + { + if (instance_config.wal_depth >= 0 && !(XLogRecPtrIsInvalid(tlinfo->anchor_lsn))) + { + delete_walfiles_in_tli(tlinfo->anchor_lsn, + tlinfo, instance_config.xlog_seg_size, dry_run); + } + else + { + delete_walfiles_in_tli(tlinfo->oldest_backup->start_lsn, + tlinfo, instance_config.xlog_seg_size, dry_run); + } + } + else + { + if (instance_config.wal_depth >= 0 && !(XLogRecPtrIsInvalid(tlinfo->anchor_lsn))) + delete_walfiles_in_tli(tlinfo->anchor_lsn, + tlinfo, instance_config.xlog_seg_size, dry_run); + else + delete_walfiles_in_tli(InvalidXLogRecPtr, + tlinfo, instance_config.xlog_seg_size, dry_run); + } + } } /* * Delete backup files of the backup and update the status of the backup to * BACKUP_STATUS_DELETED. */ -static int -pgBackupDeleteFiles(pgBackup *backup) +void +delete_backup_files(pgBackup *backup) { size_t i; - char path[MAXPGPATH]; char timestamp[100]; - parray *files; + parray *files; + size_t num_files; + char full_path[MAXPGPATH]; /* * If the backup was deleted already, there is nothing to do. */ if (backup->status == BACKUP_STATUS_DELETED) - return 0; + { + elog(WARNING, "Backup %s already deleted", + base36enc(backup->start_time)); + return; + } time2iso(timestamp, lengthof(timestamp), backup->recovery_time); - elog(INFO, "delete: %s %s", + elog(INFO, "Delete: %s %s", base36enc(backup->start_time), timestamp); /* * Update STATUS to BACKUP_STATUS_DELETING in preparation for the case which * the error occurs before deleting all backup files. */ - backup->status = BACKUP_STATUS_DELETING; - pgBackupWriteBackupControlFile(backup); + write_backup_status(backup, BACKUP_STATUS_DELETING, instance_name, false); /* list files to be deleted */ files = parray_new(); - pgBackupGetPath(backup, path, lengthof(path), NULL); - dir_list_file(files, path, false, true, true); + dir_list_file(files, backup->root_dir, false, false, true, false, false, 0, FIO_BACKUP_HOST); /* delete leaf node first */ - parray_qsort(files, pgFileComparePathDesc); - for (i = 0; i < parray_num(files); i++) + parray_qsort(files, pgFileCompareRelPathWithExternalDesc); + num_files = parray_num(files); + for (i = 0; i < num_files; i++) { pgFile *file = (pgFile *) parray_get(files, i); - /* print progress */ - elog(VERBOSE, "delete file(%zd/%lu) \"%s\"", i + 1, - (unsigned long) parray_num(files), file->path); + join_path_components(full_path, backup->root_dir, file->rel_path); - if (remove(file->path)) - { - elog(WARNING, "can't remove \"%s\": %s", file->path, - strerror(errno)); - parray_walk(files, pgFileFree); - parray_free(files); + if (interrupted) + elog(ERROR, "interrupted during delete backup"); - return 1; - } + if (progress) + elog(INFO, "Progress: (%zd/%zd). Delete file \"%s\"", + i + 1, num_files, full_path); + + pgFileDelete(file->mode, full_path); } parray_walk(files, pgFileFree); parray_free(files); backup->status = BACKUP_STATUS_DELETED; - return 0; + return; } /* - * Deletes WAL segments up to oldest_lsn or all WAL segments (if all backups - * was deleted and so oldest_lsn is invalid). + * Purge WAL archive. One timeline at a time. + * If 'keep_lsn' is InvalidXLogRecPtr, then whole timeline can be purged + * If 'keep_lsn' is valid LSN, then every lesser segment can be purged. + * If 'dry_run' is set, then don`t actually delete anything. + * + * Case 1: + * archive is not empty, 'keep_lsn' is valid and we can delete something. + * Case 2: + * archive is not empty, 'keep_lsn' is valid and prevening us from deleting anything. + * Case 3: + * archive is not empty, 'keep_lsn' is invalid, drop all WAL files in archive, + * belonging to the timeline. + * Case 4: + * archive is empty, 'keep_lsn' is valid, assume corruption of WAL archive. + * Case 5: + * archive is empty, 'keep_lsn' is invalid, drop backup history files + * and partial WAL segments in archive. * - * oldest_lsn - if valid, function deletes WAL segments, which contain lsn - * older than oldest_lsn. If it is invalid function deletes all WAL segments. - * oldest_tli - is used to construct oldest WAL segment in addition to - * oldest_lsn. + * Q: Maybe we should stop treating partial WAL segments as second-class citizens? */ static void -delete_walfiles(XLogRecPtr oldest_lsn, TimeLineID oldest_tli) +delete_walfiles_in_tli(XLogRecPtr keep_lsn, timelineInfo *tlinfo, + uint32 xlog_seg_size, bool dry_run) { - XLogSegNo targetSegNo; - char oldestSegmentNeeded[MAXFNAMELEN]; - DIR *arcdir; - struct dirent *arcde; - char wal_file[MAXPGPATH]; - char max_wal_file[MAXPGPATH]; - char min_wal_file[MAXPGPATH]; - int rc; + XLogSegNo FirstToDeleteSegNo; + XLogSegNo OldestToKeepSegNo = 0; + char first_to_del_str[MAXFNAMELEN]; + char oldest_to_keep_str[MAXFNAMELEN]; + int i; + size_t wal_size_logical = 0; + size_t wal_size_actual = 0; + char wal_pretty_size[20]; + bool purge_all = false; - max_wal_file[0] = '\0'; - min_wal_file[0] = '\0'; - if (!XLogRecPtrIsInvalid(oldest_lsn)) + /* Timeline is completely empty */ + if (parray_num(tlinfo->xlog_filelist) == 0) { - XLByteToSeg(oldest_lsn, targetSegNo); - XLogFileName(oldestSegmentNeeded, oldest_tli, targetSegNo); + elog(INFO, "Timeline %i is empty, nothing to remove", tlinfo->tli); + return; + } - elog(LOG, "removing WAL segments older than %s", oldestSegmentNeeded); + if (XLogRecPtrIsInvalid(keep_lsn)) + { + /* Drop all files in timeline */ + elog(INFO, "On timeline %i all files %s be removed", + tlinfo->tli, dry_run?"can":"will"); + FirstToDeleteSegNo = tlinfo->begin_segno; + OldestToKeepSegNo = tlinfo->end_segno; + purge_all = true; } else - elog(LOG, "removing all WAL segments"); + { + /* Drop all segments between begin_segno and segment with keep_lsn (excluding) */ + FirstToDeleteSegNo = tlinfo->begin_segno; + GetXLogSegNo(keep_lsn, OldestToKeepSegNo, xlog_seg_size); + } - /* - * Now it is time to do the actual work and to remove all the segments - * not needed anymore. - */ - if ((arcdir = opendir(arclog_path)) != NULL) + if (OldestToKeepSegNo > 0 && OldestToKeepSegNo > FirstToDeleteSegNo) { - while (errno = 0, (arcde = readdir(arcdir)) != NULL) + /* translate segno number into human readable format */ + GetXLogFileName(first_to_del_str, tlinfo->tli, FirstToDeleteSegNo, xlog_seg_size); + GetXLogFileName(oldest_to_keep_str, tlinfo->tli, OldestToKeepSegNo, xlog_seg_size); + + elog(INFO, "On timeline %i WAL segments between %s and %s %s be removed", + tlinfo->tli, first_to_del_str, + oldest_to_keep_str, dry_run?"can":"will"); + } + + /* sanity */ + if (OldestToKeepSegNo > FirstToDeleteSegNo) + { + wal_size_logical = (OldestToKeepSegNo - FirstToDeleteSegNo) * xlog_seg_size; + + /* In case of 'purge all' scenario OldestToKeepSegNo will be deleted too */ + if (purge_all) + wal_size_logical += xlog_seg_size; + } + else if (OldestToKeepSegNo < FirstToDeleteSegNo) + { + /* It is actually possible for OldestToKeepSegNo to be less than FirstToDeleteSegNo + * in case of : + * 1. WAL archive corruption. + * 2. There is no actual WAL archive to speak of and + * 'keep_lsn' is coming from STREAM backup. + */ + + if (FirstToDeleteSegNo > 0 && OldestToKeepSegNo > 0) { - /* - * We ignore the timeline part of the WAL segment identifiers in - * deciding whether a segment is still needed. This ensures that - * we won't prematurely remove a segment from a parent timeline. - * We could probably be a little more proactive about removing - * segments of non-parent timelines, but that would be a whole lot - * more complicated. - * - * We use the alphanumeric sorting property of the filenames to - * decide which ones are earlier than the exclusiveCleanupFileName - * file. Note that this means files are not removed in the order - * they were originally written, in case this worries you. - * - * We also should not forget that WAL segment can be compressed. - */ - if (IsXLogFileName(arcde->d_name) || - IsPartialXLogFileName(arcde->d_name) || - IsBackupHistoryFileName(arcde->d_name) || - IsCompressedXLogFileName(arcde->d_name)) + GetXLogFileName(first_to_del_str, tlinfo->tli, FirstToDeleteSegNo, xlog_seg_size); + GetXLogFileName(oldest_to_keep_str, tlinfo->tli, OldestToKeepSegNo, xlog_seg_size); + + elog(LOG, "On timeline %i first segment %s is greater than oldest segment to keep %s", + tlinfo->tli, first_to_del_str, oldest_to_keep_str); + } + } + else if (OldestToKeepSegNo == FirstToDeleteSegNo && !purge_all) + { + /* 'Nothing to delete' scenario because of 'keep_lsn' + * with possible exception of partial and backup history files. + */ + elog(INFO, "Nothing to remove on timeline %i", tlinfo->tli); + } + + /* Report the logical size to delete */ + if (wal_size_logical > 0) + { + pretty_size(wal_size_logical, wal_pretty_size, lengthof(wal_pretty_size)); + elog(INFO, "Logical WAL size to remove on timeline %i : %s", + tlinfo->tli, wal_pretty_size); + } + + /* Calculate the actual size to delete */ + for (i = 0; i < parray_num(tlinfo->xlog_filelist); i++) + { + xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, i); + + if (purge_all || wal_file->segno < OldestToKeepSegNo) + wal_size_actual += wal_file->file.size; + } + + /* Report the actual size to delete */ + if (wal_size_actual > 0) + { + pretty_size(wal_size_actual, wal_pretty_size, lengthof(wal_pretty_size)); + elog(INFO, "Resident WAL size to free on timeline %i : %s", + tlinfo->tli, wal_pretty_size); + } + + if (dry_run) + return; + + for (i = 0; i < parray_num(tlinfo->xlog_filelist); i++) + { + xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, i); + + if (interrupted) + elog(ERROR, "interrupted during WAL archive purge"); + + /* Any segment equal or greater than EndSegNo must be kept + * unless it`s a 'purge all' scenario. + */ + if (purge_all || wal_file->segno < OldestToKeepSegNo) + { + char wal_fullpath[MAXPGPATH]; + + join_path_components(wal_fullpath, instance_config.arclog_path, wal_file->file.name); + + /* save segment from purging */ + if (instance_config.wal_depth >= 0 && wal_file->keep) { - if (XLogRecPtrIsInvalid(oldest_lsn) || - strncmp(arcde->d_name + 8, oldestSegmentNeeded + 8, 16) < 0) - { - /* - * Use the original file name again now, including any - * extension that might have been chopped off before testing - * the sequence. - */ - snprintf(wal_file, MAXPGPATH, "%s/%s", - arclog_path, arcde->d_name); - - rc = unlink(wal_file); - if (rc != 0) - { - elog(WARNING, "could not remove file \"%s\": %s", - wal_file, strerror(errno)); - break; - } - elog(LOG, "removed WAL segment \"%s\"", wal_file); - - if (max_wal_file[0] == '\0' || - strcmp(max_wal_file + 8, arcde->d_name + 8) < 0) - strcpy(max_wal_file, arcde->d_name); - - if (min_wal_file[0] == '\0' || - strcmp(min_wal_file + 8, arcde->d_name + 8) > 0) - strcpy(min_wal_file, arcde->d_name); - } + elog(VERBOSE, "Retain WAL segment \"%s\"", wal_fullpath); + continue; } - } - if (min_wal_file[0] != '\0') - elog(INFO, "removed min WAL segment \"%s\"", min_wal_file); - if (max_wal_file[0] != '\0') - elog(INFO, "removed max WAL segment \"%s\"", max_wal_file); + /* unlink segment */ + if (fio_unlink(wal_fullpath, FIO_BACKUP_HOST) < 0) + { + /* Missing file is not considered as error condition */ + if (errno != ENOENT) + elog(ERROR, "Could not remove file \"%s\": %s", + wal_fullpath, strerror(errno)); + } + else + { + if (wal_file->type == SEGMENT) + elog(VERBOSE, "Removed WAL segment \"%s\"", wal_fullpath); + else if (wal_file->type == TEMP_SEGMENT) + elog(VERBOSE, "Removed temp WAL segment \"%s\"", wal_fullpath); + else if (wal_file->type == PARTIAL_SEGMENT) + elog(VERBOSE, "Removed partial WAL segment \"%s\"", wal_fullpath); + else if (wal_file->type == BACKUP_HISTORY_FILE) + elog(VERBOSE, "Removed backup history file \"%s\"", wal_fullpath); + } - if (errno) - elog(WARNING, "could not read archive location \"%s\": %s", - arclog_path, strerror(errno)); - if (closedir(arcdir)) - elog(WARNING, "could not close archive location \"%s\": %s", - arclog_path, strerror(errno)); + wal_deleted = true; + } } - else - elog(WARNING, "could not open archive location \"%s\": %s", - arclog_path, strerror(errno)); } @@ -418,17 +964,20 @@ delete_walfiles(XLogRecPtr oldest_lsn, TimeLineID oldest_tli) int do_delete_instance(void) { - parray *backup_list; - int i; + parray *backup_list; + int i; char instance_config_path[MAXPGPATH]; + /* Delete all backups. */ - backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); + backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + catalog_lock_backup_list(backup_list, 0, parray_num(backup_list) - 1, true); for (i = 0; i < parray_num(backup_list); i++) { pgBackup *backup = (pgBackup *) parray_get(backup_list, i); - pgBackupDeleteFiles(backup); + delete_backup_files(backup); } /* Cleanup */ @@ -436,24 +985,129 @@ do_delete_instance(void) parray_free(backup_list); /* Delete all wal files. */ - delete_walfiles(InvalidXLogRecPtr, 0); + pgut_rmtree(arclog_path, false, true); /* Delete backup instance config file */ join_path_components(instance_config_path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); if (remove(instance_config_path)) { - elog(ERROR, "can't remove \"%s\": %s", instance_config_path, + elog(ERROR, "Can't remove \"%s\": %s", instance_config_path, strerror(errno)); } /* Delete instance root directories */ if (rmdir(backup_instance_path) != 0) - elog(ERROR, "can't remove \"%s\": %s", backup_instance_path, + elog(ERROR, "Can't remove \"%s\": %s", backup_instance_path, strerror(errno)); + if (rmdir(arclog_path) != 0) - elog(ERROR, "can't remove \"%s\": %s", backup_instance_path, + elog(ERROR, "Can't remove \"%s\": %s", arclog_path, strerror(errno)); elog(INFO, "Instance '%s' successfully deleted", instance_name); return 0; } + +/* Delete all backups of given status in instance */ +void +do_delete_status(InstanceConfig *instance_config, const char *status) +{ + int i; + parray *backup_list, *delete_list; + const char *pretty_status; + int n_deleted = 0, n_found = 0; + size_t size_to_delete = 0; + char size_to_delete_pretty[20]; + pgBackup *backup; + + BackupStatus status_for_delete = str2status(status); + delete_list = parray_new(); + + if (status_for_delete == BACKUP_STATUS_INVALID) + elog(ERROR, "Unknown value for '--status' option: '%s'", status); + + /* + * User may have provided status string in lower case, but + * we should print backup statuses consistently with show command, + * so convert it. + */ + pretty_status = status2str(status_for_delete); + + backup_list = catalog_get_backup_list(instance_config->name, INVALID_BACKUP_ID); + + if (parray_num(backup_list) == 0) + { + elog(WARNING, "Instance '%s' has no backups", instance_config->name); + return; + } + + if (dry_run) + elog(INFO, "Deleting all backups with status '%s' in dry run mode", pretty_status); + else + elog(INFO, "Deleting all backups with status '%s'", pretty_status); + + /* Selects backups with specified status and their children into delete_list array. */ + for (i = 0; i < parray_num(backup_list); i++) + { + backup = (pgBackup *) parray_get(backup_list, i); + + if (backup->status == status_for_delete) + { + n_found++; + + /* incremental backup can be already in delete_list due to append_children() */ + if (parray_contains(delete_list, backup)) + continue; + parray_append(delete_list, backup); + + append_children(backup_list, backup, delete_list); + } + } + + parray_qsort(delete_list, pgBackupCompareIdDesc); + + /* delete and calculate free size from delete_list */ + for (i = 0; i < parray_num(delete_list); i++) + { + backup = (pgBackup *)parray_get(delete_list, i); + + elog(INFO, "Backup %s with status %s %s be deleted", + base36enc(backup->start_time), status2str(backup->status), dry_run ? "can" : "will"); + + size_to_delete += backup->data_bytes; + if (backup->stream) + size_to_delete += backup->wal_bytes; + + if (!dry_run && lock_backup(backup, false)) + delete_backup_files(backup); + + n_deleted++; + } + + /* Inform about data size to free */ + if (size_to_delete >= 0) + { + pretty_size(size_to_delete, size_to_delete_pretty, lengthof(size_to_delete_pretty)); + elog(INFO, "Resident data size to free by delete of %i backups: %s", + n_deleted, size_to_delete_pretty); + } + + /* delete selected backups */ + if (!dry_run && n_deleted > 0) + elog(INFO, "Successfully deleted %i %s from instance '%s'", + n_deleted, n_deleted == 1 ? "backup" : "backups", + instance_config->name); + + + if (n_found == 0) + elog(WARNING, "Instance '%s' has no backups with status '%s'", + instance_config->name, pretty_status); + + // we don`t do WAL purge here, because it is impossible to correctly handle + // dry-run case. + + /* Cleanup */ + parray_free(delete_list); + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); +} diff --git a/src/dir.c b/src/dir.c index a08bd9343..dfbb6e8c4 100644 --- a/src/dir.c +++ b/src/dir.c @@ -3,22 +3,25 @@ * dir.c: directory operation utility. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" +#include "utils/file.h" + + +#if PG_VERSION_NUM < 110000 +#include "catalog/catalog.h" +#endif +#include "catalog/pg_tablespace.h" #include #include -#include #include -#include -#include "catalog/catalog.h" -#include "catalog/pg_tablespace.h" -#include "datapagemap.h" +#include "utils/configuration.h" /* * The contents of these directories are removed or recreated during server @@ -76,6 +79,9 @@ static char *pgdata_exclude_files[] = "recovery.conf", "postmaster.pid", "postmaster.opts", + "probackup_recovery.conf", + "recovery.signal", + "standby.signal", NULL }; @@ -115,19 +121,20 @@ typedef struct TablespaceCreatedList TablespaceCreatedListCell *tail; } TablespaceCreatedList; -static int BlackListCompare(const void *str1, const void *str2); +static int pgCompareString(const void *str1, const void *str2); -static bool dir_check_file(const char *root, pgFile *file); -static void dir_list_file_internal(parray *files, const char *root, - pgFile *parent, bool exclude, - bool omit_symlink, parray *black_list); +static char dir_check_file(pgFile *file, bool backup_logs); -static void list_data_directories(parray *files, const char *path, bool is_root, - bool exclude); +static void dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, + bool exclude, bool follow_symlink, bool backup_logs, + bool skip_hidden, int external_dir_num, fio_location location); +static void opt_path_map(ConfigOption *opt, const char *arg, + TablespaceList *list, const char *type); /* Tablespace mapping */ static TablespaceList tablespace_dirs = {NULL, NULL}; -static TablespaceCreatedList tablespace_created_dirs = {NULL, NULL}; +/* Extra directories mapping */ +static TablespaceList external_remap_list = {NULL, NULL}; /* * Create directory, also create parent directories if necessary. @@ -156,13 +163,14 @@ dir_create_dir(const char *dir, mode_t mode) } pgFile * -pgFileNew(const char *path, bool omit_symlink) +pgFileNew(const char *path, const char *rel_path, bool follow_symlink, + int external_dir_num, fio_location location) { struct stat st; pgFile *file; /* stat the file */ - if ((omit_symlink ? stat(path, &st) : lstat(path, &st)) == -1) + if (fio_stat(path, &st, follow_symlink, location) < 0) { /* file not found is not an error case */ if (errno == ENOENT) @@ -171,59 +179,44 @@ pgFileNew(const char *path, bool omit_symlink) strerror(errno)); } - file = pgFileInit(path); + file = pgFileInit(rel_path); file->size = st.st_size; file->mode = st.st_mode; + file->mtime = st.st_mtime; + file->external_dir_num = external_dir_num; return file; } pgFile * -pgFileInit(const char *path) +pgFileInit(const char *rel_path) { pgFile *file; - char *file_name; + char *file_name = NULL; file = (pgFile *) pgut_malloc(sizeof(pgFile)); + MemSet(file, 0, sizeof(pgFile)); - file->name = NULL; - - file->size = 0; - file->mode = 0; - file->read_size = 0; - file->write_size = 0; - file->crc = 0; - file->is_datafile = false; - file->linked = NULL; - file->pagemap.bitmap = NULL; - file->pagemap.bitmapsize = PageBitmapIsEmpty; - file->pagemap_isabsent = false; - file->tblspcOid = 0; - file->dbOid = 0; - file->relOid = 0; - file->segno = 0; - file->is_database = false; - file->forkName = pgut_malloc(MAXPGPATH); - file->forkName[0] = '\0'; - - file->path = pgut_malloc(strlen(path) + 1); - strcpy(file->path, path); /* enough buffer size guaranteed */ + file->rel_path = pgut_strdup(rel_path); + canonicalize_path(file->rel_path); /* Get file name from the path */ - file_name = strrchr(file->path, '/'); + file_name = last_dir_separator(file->rel_path); + if (file_name == NULL) - file->name = file->path; + file->name = file->rel_path; else { file_name++; file->name = file_name; } - file->is_cfs = false; - file->exists_in_prev = false; /* can change only in Incremental backup. */ /* Number of blocks readed during backup */ file->n_blocks = BLOCKNUM_INVALID; - file->compress_alg = NOT_DEFINED_COMPRESS; + + /* Number of blocks backed up during backup */ + file->n_headers = 0; + return file; } @@ -232,65 +225,160 @@ pgFileInit(const char *path) * If the pgFile points directory, the directory must be empty. */ void -pgFileDelete(pgFile *file) +pgFileDelete(mode_t mode, const char *full_path) { - if (S_ISDIR(file->mode)) + if (S_ISDIR(mode)) { - if (rmdir(file->path) == -1) + if (rmdir(full_path) == -1) { if (errno == ENOENT) return; else if (errno == ENOTDIR) /* could be symbolic link */ goto delete_file; - elog(ERROR, "cannot remove directory \"%s\": %s", - file->path, strerror(errno)); + elog(ERROR, "Cannot remove directory \"%s\": %s", + full_path, strerror(errno)); } return; } delete_file: - if (remove(file->path) == -1) + if (remove(full_path) == -1) { if (errno == ENOENT) return; - elog(ERROR, "cannot remove file \"%s\": %s", file->path, + elog(ERROR, "Cannot remove file \"%s\": %s", full_path, strerror(errno)); } } +/* + * Read the local file to compute its CRC. + * We cannot make decision about file decompression because + * user may ask to backup already compressed files and we should be + * obvious about it. + */ pg_crc32 -pgFileGetCRC(const char *file_path) +pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok) { FILE *fp; pg_crc32 crc = 0; - char buf[1024]; - size_t len; - int errno_tmp; + char *buf; + size_t len = 0; + + INIT_FILE_CRC32(use_crc32c, crc); /* open file in binary read mode */ fp = fopen(file_path, PG_BINARY_R); if (fp == NULL) - elog(ERROR, "cannot open file \"%s\": %s", + { + if (errno == ENOENT) + { + if (missing_ok) + { + FIN_FILE_CRC32(use_crc32c, crc); + return crc; + } + } + + elog(ERROR, "Cannot open file \"%s\": %s", file_path, strerror(errno)); + } - /* calc CRC of backup file */ - INIT_CRC32C(crc); - while ((len = fread(buf, 1, sizeof(buf), fp)) == sizeof(buf)) + /* disable stdio buffering */ + setvbuf(fp, NULL, _IONBF, BUFSIZ); + buf = pgut_malloc(STDIO_BUFSIZE); + + /* calc CRC of file */ + for (;;) { if (interrupted) elog(ERROR, "interrupted during CRC calculation"); - COMP_CRC32C(crc, buf, len); + + len = fread(buf, 1, STDIO_BUFSIZE, fp); + + if (ferror(fp)) + elog(ERROR, "Cannot read \"%s\": %s", file_path, strerror(errno)); + + /* update CRC */ + COMP_FILE_CRC32(use_crc32c, crc, buf, len); + + if (feof(fp)) + break; } - errno_tmp = errno; - if (!feof(fp)) - elog(WARNING, "cannot read \"%s\": %s", file_path, - strerror(errno_tmp)); - if (len > 0) - COMP_CRC32C(crc, buf, len); - FIN_CRC32C(crc); + FIN_FILE_CRC32(use_crc32c, crc); fclose(fp); + pg_free(buf); + + return crc; +} + +/* + * Read the local file to compute its CRC. + * We cannot make decision about file decompression because + * user may ask to backup already compressed files and we should be + * obvious about it. + */ +pg_crc32 +pgFileGetCRCgz(const char *file_path, bool use_crc32c, bool missing_ok) +{ + gzFile fp; + pg_crc32 crc = 0; + int len = 0; + int err; + char *buf; + + INIT_FILE_CRC32(use_crc32c, crc); + + /* open file in binary read mode */ + fp = gzopen(file_path, PG_BINARY_R); + if (fp == NULL) + { + if (errno == ENOENT) + { + if (missing_ok) + { + FIN_FILE_CRC32(use_crc32c, crc); + return crc; + } + } + + elog(ERROR, "Cannot open file \"%s\": %s", + file_path, strerror(errno)); + } + + buf = pgut_malloc(STDIO_BUFSIZE); + + /* calc CRC of file */ + for (;;) + { + if (interrupted) + elog(ERROR, "interrupted during CRC calculation"); + + len = gzread(fp, buf, STDIO_BUFSIZE); + + if (len <= 0) + { + /* we either run into eof or error */ + if (gzeof(fp)) + break; + else + { + const char *err_str = NULL; + + err_str = gzerror(fp, &err); + elog(ERROR, "Cannot read from compressed file %s", err_str); + } + } + + /* update CRC */ + COMP_FILE_CRC32(use_crc32c, crc, buf, len); + } + + FIN_FILE_CRC32(use_crc32c, crc); + gzclose(fp); + pg_free(buf); return crc; } @@ -305,31 +393,64 @@ pgFileFree(void *file) file_ptr = (pgFile *) file; - if (file_ptr->linked) - free(file_ptr->linked); + pfree(file_ptr->linked); + pfree(file_ptr->rel_path); - if (file_ptr->forkName) - free(file_ptr->forkName); - - free(file_ptr->path); - free(file); + pfree(file); } /* Compare two pgFile with their path in ascending order of ASCII code. */ int -pgFileComparePath(const void *f1, const void *f2) +pgFileMapComparePath(const void *f1, const void *f2) +{ + page_map_entry *f1p = *(page_map_entry **)f1; + page_map_entry *f2p = *(page_map_entry **)f2; + + return strcmp(f1p->path, f2p->path); +} + +/* Compare two pgFile with their name in ascending order of ASCII code. */ +int +pgFileCompareName(const void *f1, const void *f2) { pgFile *f1p = *(pgFile **)f1; pgFile *f2p = *(pgFile **)f2; - return strcmp(f1p->path, f2p->path); + return strcmp(f1p->name, f2p->name); } -/* Compare two pgFile with their path in descending order of ASCII code. */ +/* + * Compare two pgFile with their relative path and external_dir_num in ascending + * order of ASСII code. + */ int -pgFileComparePathDesc(const void *f1, const void *f2) +pgFileCompareRelPathWithExternal(const void *f1, const void *f2) { - return -pgFileComparePath(f1, f2); + pgFile *f1p = *(pgFile **)f1; + pgFile *f2p = *(pgFile **)f2; + int res; + + res = strcmp(f1p->rel_path, f2p->rel_path); + if (res == 0) + { + if (f1p->external_dir_num > f2p->external_dir_num) + return 1; + else if (f1p->external_dir_num < f2p->external_dir_num) + return -1; + else + return 0; + } + return res; +} + +/* + * Compare two pgFile with their rel_path and external_dir_num + * in descending order of ASCII code. + */ +int +pgFileCompareRelPathWithExternalDesc(const void *f1, const void *f2) +{ + return -pgFileCompareRelPathWithExternal(f1, f2); } /* Compare two pgFile with their linked directory path. */ @@ -358,72 +479,82 @@ pgFileCompareSize(const void *f1, const void *f2) } static int -BlackListCompare(const void *str1, const void *str2) +pgCompareString(const void *str1, const void *str2) { return strcmp(*(char **) str1, *(char **) str2); } +/* Compare two Oids */ +int +pgCompareOid(const void *f1, const void *f2) +{ + Oid *v1 = *(Oid **) f1; + Oid *v2 = *(Oid **) f2; + + if (*v1 > *v2) + return 1; + else if (*v1 < *v2) + return -1; + else + return 0;} + + +void +db_map_entry_free(void *entry) +{ + db_map_entry *m = (db_map_entry *) entry; + + free(m->datname); + free(entry); +} + /* * List files, symbolic links and directories in the directory "root" and add * pgFile objects to "files". We add "root" to "files" if add_root is true. * - * When omit_symlink is true, symbolic link is ignored and only file or - * directory llnked to will be listed. + * When follow_symlink is true, symbolic link is ignored and only file or + * directory linked to will be listed. */ void -dir_list_file(parray *files, const char *root, bool exclude, bool omit_symlink, - bool add_root) +dir_list_file(parray *files, const char *root, bool exclude, bool follow_symlink, + bool add_root, bool backup_logs, bool skip_hidden, int external_dir_num, + fio_location location) { pgFile *file; - parray *black_list = NULL; - char path[MAXPGPATH]; - join_path_components(path, backup_instance_path, PG_BLACK_LIST); - /* List files with black list */ - if (root && pgdata && strcmp(root, pgdata) == 0 && fileExists(path)) + file = pgFileNew(root, "", follow_symlink, external_dir_num, location); + if (file == NULL) { - FILE *black_list_file = NULL; - char buf[MAXPGPATH * 2]; - char black_item[MAXPGPATH * 2]; - - black_list = parray_new(); - black_list_file = fopen(path, PG_BINARY_R); - - if (black_list_file == NULL) - elog(ERROR, "cannot open black_list: %s", strerror(errno)); - - while (fgets(buf, lengthof(buf), black_list_file) != NULL) - { - join_path_components(black_item, pgdata, buf); - - if (black_item[strlen(black_item) - 1] == '\n') - black_item[strlen(black_item) - 1] = '\0'; - - if (black_item[0] == '#' || black_item[0] == '\0') - continue; - - parray_append(black_list, black_item); - } - - fclose(black_list_file); - parray_qsort(black_list, BlackListCompare); + /* For external directory this is not ok */ + if (external_dir_num > 0) + elog(ERROR, "External directory is not found: \"%s\"", root); + else + return; } - file = pgFileNew(root, false); - if (file == NULL) - return; - if (!S_ISDIR(file->mode)) { - elog(WARNING, "Skip \"%s\": unexpected file format", file->path); + if (external_dir_num > 0) + elog(ERROR, " --external-dirs option \"%s\": directory or symbolic link expected", + root); + else + elog(WARNING, "Skip \"%s\": unexpected file format", root); return; } if (add_root) parray_append(files, file); - dir_list_file_internal(files, root, file, exclude, omit_symlink, black_list); + dir_list_file_internal(files, file, root, exclude, follow_symlink, + backup_logs, skip_hidden, external_dir_num, location); + + if (!add_root) + pgFileFree(file); } +#define CHECK_FALSE 0 +#define CHECK_TRUE 1 +#define CHECK_EXCLUDE_FALSE 2 + /* * Check file or directory. * @@ -432,16 +563,19 @@ dir_list_file(parray *files, const char *root, bool exclude, bool omit_symlink, * Skip files: * - skip temp tables files * - skip unlogged tables files + * Skip recursive tablespace content * Set flags for: * - database directories * - datafiles */ -static bool -dir_check_file(const char *root, pgFile *file) +static char +dir_check_file(pgFile *file, bool backup_logs) { - const char *rel_path; int i; int sscanf_res; + bool in_tablespace = false; + + in_tablespace = path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path); /* Check if we need to exclude file by name */ if (S_ISREG(file->mode)) @@ -449,28 +583,28 @@ dir_check_file(const char *root, pgFile *file) if (!exclusive_backup) { for (i = 0; pgdata_exclude_files_non_exclusive[i]; i++) - if (strcmp(file->name, + if (strcmp(file->rel_path, pgdata_exclude_files_non_exclusive[i]) == 0) { /* Skip */ elog(VERBOSE, "Excluding file: %s", file->name); - return false; + return CHECK_FALSE; } } for (i = 0; pgdata_exclude_files[i]; i++) - if (strcmp(file->name, pgdata_exclude_files[i]) == 0) + if (strcmp(file->rel_path, pgdata_exclude_files[i]) == 0) { /* Skip */ elog(VERBOSE, "Excluding file: %s", file->name); - return false; + return CHECK_FALSE; } } /* * If the directory name is in the exclude list, do not list the * contents. */ - else if (S_ISDIR(file->mode)) + else if (S_ISDIR(file->mode) && !in_tablespace && file->external_dir_num == 0) { /* * If the item in the exclude list starts with '/', compare to @@ -479,27 +613,25 @@ dir_check_file(const char *root, pgFile *file) */ for (i = 0; pgdata_exclude_dir[i]; i++) { - /* Full-path exclude*/ - if (pgdata_exclude_dir[i][0] == '/') + /* relative path exclude */ + if (strcmp(file->rel_path, pgdata_exclude_dir[i]) == 0) { - if (strcmp(file->path, pgdata_exclude_dir[i]) == 0) - { - elog(VERBOSE, "Excluding directory content: %s", - file->name); - return false; - } + elog(VERBOSE, "Excluding directory content: %s", file->rel_path); + return CHECK_EXCLUDE_FALSE; } - else if (strcmp(file->name, pgdata_exclude_dir[i]) == 0) + } + + if (!backup_logs) + { + if (strcmp(file->rel_path, PG_LOG_DIR) == 0) { - elog(VERBOSE, "Excluding directory content: %s", - file->name); - return false; + /* Skip */ + elog(VERBOSE, "Excluding directory content: %s", file->rel_path); + return CHECK_EXCLUDE_FALSE; } } } - rel_path = GetRelativePath(file->path, root); - /* * Do not copy tablespaces twice. It may happen if the tablespace is located * inside the PGDATA. @@ -514,46 +646,53 @@ dir_check_file(const char *root, pgFile *file) * Valid path for the tablespace is * pg_tblspc/tblsOid/TABLESPACE_VERSION_DIRECTORY */ - if (!path_is_prefix_of_path(PG_TBLSPC_DIR, rel_path)) - return false; - sscanf_res = sscanf(rel_path, PG_TBLSPC_DIR "/%u/%s", + if (!path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path)) + return CHECK_FALSE; + sscanf_res = sscanf(file->rel_path, PG_TBLSPC_DIR "/%u/%s", &tblspcOid, tmp_rel_path); if (sscanf_res == 0) - return false; + return CHECK_FALSE; } - if (path_is_prefix_of_path("global", rel_path)) + if (in_tablespace) { - file->tblspcOid = GLOBALTABLESPACE_OID; + char tmp_rel_path[MAXPGPATH]; - if (S_ISDIR(file->mode) && strcmp(file->name, "global") == 0) + sscanf_res = sscanf(file->rel_path, PG_TBLSPC_DIR "/%u/%[^/]/%u/", + &(file->tblspcOid), tmp_rel_path, + &(file->dbOid)); + + /* + * We should skip other files and directories rather than + * TABLESPACE_VERSION_DIRECTORY, if this is recursive tablespace. + */ + if (sscanf_res == 2 && strcmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY) != 0) + return CHECK_FALSE; + + if (sscanf_res == 3 && S_ISDIR(file->mode) && + strcmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY) == 0) file->is_database = true; } - else if (path_is_prefix_of_path("base", rel_path)) + else if (path_is_prefix_of_path("global", file->rel_path)) { - file->tblspcOid = DEFAULTTABLESPACE_OID; - - sscanf(rel_path, "base/%u/", &(file->dbOid)); + file->tblspcOid = GLOBALTABLESPACE_OID; - if (S_ISDIR(file->mode) && strcmp(file->name, "base") != 0) + if (S_ISDIR(file->mode) && strcmp(file->name, "global") == 0) file->is_database = true; } - else if (path_is_prefix_of_path(PG_TBLSPC_DIR, rel_path)) + else if (path_is_prefix_of_path("base", file->rel_path)) { - char tmp_rel_path[MAXPGPATH]; + file->tblspcOid = DEFAULTTABLESPACE_OID; - sscanf_res = sscanf(rel_path, PG_TBLSPC_DIR "/%u/%[^/]/%u/", - &(file->tblspcOid), tmp_rel_path, - &(file->dbOid)); + sscanf(file->rel_path, "base/%u/", &(file->dbOid)); - if (sscanf_res == 3 && S_ISDIR(file->mode) && - strcmp(tmp_rel_path, TABLESPACE_VERSION_DIRECTORY) == 0) + if (S_ISDIR(file->mode) && strcmp(file->name, "base") != 0) file->is_database = true; } /* Do not backup ptrack_init files */ if (S_ISREG(file->mode) && strcmp(file->name, "ptrack_init") == 0) - return false; + return CHECK_FALSE; /* * Check files located inside database directories including directory @@ -563,10 +702,17 @@ dir_check_file(const char *root, pgFile *file) file->name && file->name[0]) { if (strcmp(file->name, "pg_internal.init") == 0) - return false; + return CHECK_FALSE; + /* Do not backup ptrack2.x map files */ + else if (strcmp(file->name, "ptrack.map") == 0) + return CHECK_FALSE; + else if (strcmp(file->name, "ptrack.map.mmap") == 0) + return CHECK_FALSE; + else if (strcmp(file->name, "ptrack.map.tmp") == 0) + return CHECK_FALSE; /* Do not backup temp files */ else if (file->name[0] == 't' && isdigit(file->name[1])) - return false; + return CHECK_FALSE; else if (isdigit(file->name[0])) { char *fork_name; @@ -577,18 +723,39 @@ dir_check_file(const char *root, pgFile *file) if (fork_name) { /* Auxiliary fork of the relfile */ - sscanf(file->name, "%u_%s", &(file->relOid), file->forkName); + if (strcmp(fork_name, "vm") == 0) + file->forkName = vm; + + else if (strcmp(fork_name, "fsm") == 0) + file->forkName = fsm; + + else if (strcmp(fork_name, "cfm") == 0) + file->forkName = cfm; + + else if (strcmp(fork_name, "ptrack") == 0) + file->forkName = ptrack; + + else if (strcmp(fork_name, "init") == 0) + file->forkName = init; /* Do not backup ptrack files */ - if (strcmp(file->forkName, "ptrack") == 0) - return false; + if (file->forkName == ptrack) + return CHECK_FALSE; } else { + /* + * snapfs files: + * RELFILENODE.BLOCKNO.snapmap.SNAPID + * RELFILENODE.BLOCKNO.snap.SNAPID + */ + if (strstr(file->name, "snap") != NULL) + return true; + len = strlen(file->name); /* reloid.cfm */ if (len > 3 && strcmp(file->name + len - 3, "cfm") == 0) - return true; + return CHECK_TRUE; sscanf_res = sscanf(file->name, "%u.%d.%s", &(file->relOid), &(file->segno), suffix); @@ -600,25 +767,27 @@ dir_check_file(const char *root, pgFile *file) } } - return true; + return CHECK_TRUE; } /* - * List files in "root" directory. If "exclude" is true do not add into "files" - * files from pgdata_exclude_files and directories from pgdata_exclude_dir. + * List files in parent->path directory. If "exclude" is true do not add into + * "files" files from pgdata_exclude_files and directories from + * pgdata_exclude_dir. */ static void -dir_list_file_internal(parray *files, const char *root, pgFile *parent, - bool exclude, bool omit_symlink, parray *black_list) +dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, + bool exclude, bool follow_symlink, bool backup_logs, + bool skip_hidden, int external_dir_num, fio_location location) { - DIR *dir; + DIR *dir; struct dirent *dent; if (!S_ISDIR(parent->mode)) - elog(ERROR, "\"%s\" is not a directory", parent->path); + elog(ERROR, "\"%s\" is not a directory", parent_dir); /* Open directory and list contents */ - dir = opendir(parent->path); + dir = fio_opendir(parent_dir, location); if (dir == NULL) { if (errno == ENOENT) @@ -626,19 +795,23 @@ dir_list_file_internal(parray *files, const char *root, pgFile *parent, /* Maybe the directory was removed */ return; } - elog(ERROR, "cannot open directory \"%s\": %s", - parent->path, strerror(errno)); + elog(ERROR, "Cannot open directory \"%s\": %s", + parent_dir, strerror(errno)); } errno = 0; - while ((dent = readdir(dir))) + while ((dent = fio_readdir(dir))) { pgFile *file; char child[MAXPGPATH]; + char rel_child[MAXPGPATH]; + char check_res; - join_path_components(child, parent->path, dent->d_name); + join_path_components(child, parent_dir, dent->d_name); + join_path_components(rel_child, parent->rel_path, dent->d_name); - file = pgFileNew(child, omit_symlink); + file = pgFileNew(child, rel_child, follow_symlink, external_dir_num, + location); if (file == NULL) continue; @@ -650,159 +823,61 @@ dir_list_file_internal(parray *files, const char *root, pgFile *parent, continue; } - /* - * Add only files, directories and links. Skip sockets and other - * unexpected file formats. - */ - if (!S_ISDIR(file->mode) && !S_ISREG(file->mode)) + /* skip hidden files and directories */ + if (skip_hidden && file->name[0] == '.') { - elog(WARNING, "Skip \"%s\": unexpected file format", file->path); + elog(WARNING, "Skip hidden file: '%s'", child); pgFileFree(file); continue; } - /* Skip if the directory is in black_list defined by user */ - if (black_list && parray_bsearch(black_list, file->path, - BlackListCompare)) + /* + * Add only files, directories and links. Skip sockets and other + * unexpected file formats. + */ + if (!S_ISDIR(file->mode) && !S_ISREG(file->mode)) { - elog(LOG, "Skip \"%s\": it is in the user's black list", file->path); + elog(WARNING, "Skip '%s': unexpected file format", child); pgFileFree(file); continue; } - /* We add the directory anyway */ - if (S_ISDIR(file->mode)) - parray_append(files, file); - - if (exclude && !dir_check_file(root, file)) + if (exclude) { - if (S_ISREG(file->mode)) + check_res = dir_check_file(file, backup_logs); + if (check_res == CHECK_FALSE) + { + /* Skip */ pgFileFree(file); - /* Skip */ - continue; + continue; + } + else if (check_res == CHECK_EXCLUDE_FALSE) + { + /* We add the directory itself which content was excluded */ + parray_append(files, file); + continue; + } } - /* At least add the file */ - if (S_ISREG(file->mode)) - parray_append(files, file); + parray_append(files, file); /* * If the entry is a directory call dir_list_file_internal() * recursively. */ if (S_ISDIR(file->mode)) - dir_list_file_internal(files, root, file, exclude, omit_symlink, - black_list); + dir_list_file_internal(files, file, child, exclude, follow_symlink, + backup_logs, skip_hidden, external_dir_num, location); } if (errno && errno != ENOENT) { int errno_tmp = errno; - closedir(dir); - elog(ERROR, "cannot read directory \"%s\": %s", - parent->path, strerror(errno_tmp)); + fio_closedir(dir); + elog(ERROR, "Cannot read directory \"%s\": %s", + parent_dir, strerror(errno_tmp)); } - closedir(dir); -} - -/* - * List data directories excluding directories from - * pgdata_exclude_dir array. - * - * **is_root** is a little bit hack. We exclude only first level of directories - * and on the first level we check all files and directories. - */ -static void -list_data_directories(parray *files, const char *path, bool is_root, - bool exclude) -{ - DIR *dir; - struct dirent *dent; - int prev_errno; - bool has_child_dirs = false; - - /* open directory and list contents */ - dir = opendir(path); - if (dir == NULL) - elog(ERROR, "cannot open directory \"%s\": %s", path, strerror(errno)); - - errno = 0; - while ((dent = readdir(dir))) - { - char child[MAXPGPATH]; - bool skip = false; - struct stat st; - - /* skip entries point current dir or parent dir */ - if (strcmp(dent->d_name, ".") == 0 || - strcmp(dent->d_name, "..") == 0) - continue; - - join_path_components(child, path, dent->d_name); - - if (lstat(child, &st) == -1) - elog(ERROR, "cannot stat file \"%s\": %s", child, strerror(errno)); - - if (!S_ISDIR(st.st_mode)) - continue; - - /* Check for exclude for the first level of listing */ - if (is_root && exclude) - { - int i; - - for (i = 0; pgdata_exclude_dir[i]; i++) - { - if (strcmp(dent->d_name, pgdata_exclude_dir[i]) == 0) - { - skip = true; - break; - } - } - } - if (skip) - continue; - - has_child_dirs = true; - list_data_directories(files, child, false, exclude); - } - - /* List only full and last directories */ - if (!is_root && !has_child_dirs) - { - pgFile *dir; - - dir = pgFileNew(path, false); - parray_append(files, dir); - } - - prev_errno = errno; - closedir(dir); - - if (prev_errno && prev_errno != ENOENT) - elog(ERROR, "cannot read directory \"%s\": %s", - path, strerror(prev_errno)); -} - -/* - * Save create directory path into memory. We can use it in next page restore to - * not raise the error "restore tablespace destination is not empty" in - * create_data_directories(). - */ -static void -set_tablespace_created(const char *link, const char *dir) -{ - TablespaceCreatedListCell *cell = pgut_new(TablespaceCreatedListCell); - - strcpy(cell->link_name, link); - strcpy(cell->linked_dir, dir); - cell->next = NULL; - - if (tablespace_created_dirs.tail) - tablespace_created_dirs.tail->next = cell; - else - tablespace_created_dirs.head = cell; - tablespace_created_dirs.tail = cell; + fio_closedir(dir); } /* @@ -824,34 +899,21 @@ get_tablespace_mapping(const char *dir) } /* - * Is directory was created when symlink was created in restore_directories(). - */ -static const char * -get_tablespace_created(const char *link) -{ - TablespaceCreatedListCell *cell; - - for (cell = tablespace_created_dirs.head; cell; cell = cell->next) - if (strcmp(link, cell->link_name) == 0) - return cell->linked_dir; - - return NULL; -} - -/* - * Split argument into old_dir and new_dir and append to tablespace mapping + * Split argument into old_dir and new_dir and append to mapping * list. * * Copy of function tablespace_list_append() from pg_basebackup.c. */ -void -opt_tablespace_map(pgut_option *opt, const char *arg) +static void +opt_path_map(ConfigOption *opt, const char *arg, TablespaceList *list, + const char *type) { TablespaceListCell *cell = pgut_new(TablespaceListCell); char *dst; char *dst_ptr; const char *arg_ptr; + memset(cell, 0, sizeof(TablespaceListCell)); dst_ptr = dst = cell->old_dir; for (arg_ptr = arg; *arg_ptr; arg_ptr++) { @@ -863,7 +925,7 @@ opt_tablespace_map(pgut_option *opt, const char *arg) else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) { if (*cell->new_dir) - elog(ERROR, "multiple \"=\" signs in tablespace mapping\n"); + elog(ERROR, "multiple \"=\" signs in %s mapping\n", type); else dst = dst_ptr = cell->new_dir; } @@ -872,8 +934,10 @@ opt_tablespace_map(pgut_option *opt, const char *arg) } if (!*cell->old_dir || !*cell->new_dir) - elog(ERROR, "invalid tablespace mapping format \"%s\", " - "must be \"OLDDIR=NEWDIR\"", arg); + elog(ERROR, "invalid %s mapping format \"%s\", " + "must be \"OLDDIR=NEWDIR\"", type, arg); + canonicalize_path(cell->old_dir); + canonicalize_path(cell->new_dir); /* * This check isn't absolutely necessary. But all tablespaces are created @@ -882,167 +946,170 @@ opt_tablespace_map(pgut_option *opt, const char *arg) * consistent with the new_dir check. */ if (!is_absolute_path(cell->old_dir)) - elog(ERROR, "old directory is not an absolute path in tablespace mapping: %s\n", - cell->old_dir); + elog(ERROR, "old directory is not an absolute path in %s mapping: %s\n", + type, cell->old_dir); if (!is_absolute_path(cell->new_dir)) - elog(ERROR, "new directory is not an absolute path in tablespace mapping: %s\n", - cell->new_dir); + elog(ERROR, "new directory is not an absolute path in %s mapping: %s\n", + type, cell->new_dir); - if (tablespace_dirs.tail) - tablespace_dirs.tail->next = cell; + if (list->tail) + list->tail->next = cell; else - tablespace_dirs.head = cell; - tablespace_dirs.tail = cell; + list->head = cell; + list->tail = cell; +} + +/* Parse tablespace mapping */ +void +opt_tablespace_map(ConfigOption *opt, const char *arg) +{ + opt_path_map(opt, arg, &tablespace_dirs, "tablespace"); +} + +/* Parse external directories mapping */ +void +opt_externaldir_map(ConfigOption *opt, const char *arg) +{ + opt_path_map(opt, arg, &external_remap_list, "external directory"); } /* - * Create backup directories from **backup_dir** to **data_dir**. Doesn't raise - * an error if target directories exist. + * Create directories from **dest_files** in **data_dir**. * * If **extract_tablespaces** is true then try to extract tablespace data * directories into their initial path using tablespace_map file. + * Use **backup_dir** for tablespace_map extracting. + * + * Enforce permissions from backup_content.control. The only + * problem now is with PGDATA itself. + * TODO: we must preserve PGDATA permissions somewhere. Is it actually a problem? + * Shouldn`t starting postgres force correct permissions on PGDATA? + * + * TODO: symlink handling. If user located symlink in PG_TBLSPC_DIR, it will + * be restored as directory. */ void -create_data_directories(const char *data_dir, const char *backup_dir, - bool extract_tablespaces) +create_data_directories(parray *dest_files, const char *data_dir, const char *backup_dir, + bool extract_tablespaces, bool incremental, fio_location location) { - parray *dirs, - *links = NULL; - size_t i; - char backup_database_dir[MAXPGPATH], - to_path[MAXPGPATH]; + int i; + parray *links = NULL; + mode_t pg_tablespace_mode = DIR_PERMISSION; + char to_path[MAXPGPATH]; - dirs = parray_new(); + /* get tablespace map */ if (extract_tablespaces) { links = parray_new(); read_tablespace_map(links, backup_dir); + /* Sort links by a link name */ + parray_qsort(links, pgFileCompareName); } - join_path_components(backup_database_dir, backup_dir, DATABASE_DIR); - list_data_directories(dirs, backup_database_dir, true, false); - - elog(LOG, "restore directories and symlinks..."); - - for (i = 0; i < parray_num(dirs); i++) + /* + * We have no idea about tablespace permission + * For PG < 11 we can just force default permissions. + */ +#if PG_VERSION_NUM >= 110000 + if (links) { - pgFile *dir = (pgFile *) parray_get(dirs, i); - char *relative_ptr = GetRelativePath(dir->path, backup_database_dir); + /* For PG>=11 we use temp kludge: trust permissions on 'pg_tblspc' + * and force them on every tablespace. + * TODO: remove kludge and ask data_directory_mode + * at the start of backup. + */ + for (i = 0; i < parray_num(dest_files); i++) + { + pgFile *file = (pgFile *) parray_get(dest_files, i); - Assert(S_ISDIR(dir->mode)); + if (!S_ISDIR(file->mode)) + continue; - /* Try to create symlink and linked directory if necessary */ - if (extract_tablespaces && - path_is_prefix_of_path(PG_TBLSPC_DIR, relative_ptr)) - { - char *link_ptr = GetRelativePath(relative_ptr, PG_TBLSPC_DIR), - *link_sep, - *tmp_ptr; - char link_name[MAXPGPATH]; - pgFile **link; - - /* Extract link name from relative path */ - link_sep = first_dir_separator(link_ptr); - if (link_sep != NULL) + /* skip external directory content */ + if (file->external_dir_num != 0) + continue; + + /* look for 'pg_tblspc' directory */ + if (strcmp(file->rel_path, PG_TBLSPC_DIR) == 0) { - int len = link_sep - link_ptr; - strncpy(link_name, link_ptr, len); - link_name[len] = '\0'; + pg_tablespace_mode = file->mode; + break; } - else - goto create_directory; + } + } +#endif - tmp_ptr = dir->path; - dir->path = link_name; - /* Search only by symlink name without path */ - link = (pgFile **) parray_bsearch(links, dir, pgFileComparePath); - dir->path = tmp_ptr; + /* + * We iterate over dest_files and for every directory with parent 'pg_tblspc' + * we must lookup this directory name in tablespace map. + * If we got a match, we treat this directory as tablespace. + * It means that we create directory specified in tablespace_map and + * original directory created as symlink to it. + */ - if (link) - { - const char *linked_path = get_tablespace_mapping((*link)->linked); - const char *dir_created; + elog(LOG, "Restore directories and symlinks..."); - if (!is_absolute_path(linked_path)) - elog(ERROR, "tablespace directory is not an absolute path: %s\n", - linked_path); + /* create directories */ + for (i = 0; i < parray_num(dest_files); i++) + { + char parent_dir[MAXPGPATH]; + pgFile *dir = (pgFile *) parray_get(dest_files, i); - /* Check if linked directory was created earlier */ - dir_created = get_tablespace_created(link_name); - if (dir_created) - { - /* - * If symlink and linked directory were created do not - * create it second time. - */ - if (strcmp(dir_created, linked_path) == 0) - { - /* - * Create rest of directories. - * First check is there any directory name after - * separator. - */ - if (link_sep != NULL && *(link_sep + 1) != '\0') - goto create_directory; - else - continue; - } - else - elog(ERROR, "tablespace directory \"%s\" of page backup does not " - "match with previous created tablespace directory \"%s\" of symlink \"%s\"", - linked_path, dir_created, link_name); - } + if (!S_ISDIR(dir->mode)) + continue; + + /* skip external directory content */ + if (dir->external_dir_num != 0) + continue; - /* - * This check was done in check_tablespace_mapping(). But do - * it again. + /* tablespace_map exists */ + if (links) + { + /* get parent dir of rel_path */ + strncpy(parent_dir, dir->rel_path, MAXPGPATH); + get_parent_directory(parent_dir); + + /* check if directory is actually link to tablespace */ + if (strcmp(parent_dir, PG_TBLSPC_DIR) == 0) + { + /* this directory located in pg_tblspc + * check it against tablespace map */ - if (!dir_is_empty(linked_path)) - elog(ERROR, "restore tablespace destination is not empty: \"%s\"", - linked_path); - - if (link_sep) - elog(LOG, "create directory \"%s\" and symbolic link \"%.*s\"", - linked_path, - (int) (link_sep - relative_ptr), relative_ptr); - else - elog(LOG, "create directory \"%s\" and symbolic link \"%s\"", - linked_path, relative_ptr); + pgFile **link = (pgFile **) parray_bsearch(links, dir, pgFileCompareName); - /* Firstly, create linked directory */ - dir_create_dir(linked_path, DIR_PERMISSION); + /* got match */ + if (link) + { + const char *linked_path = get_tablespace_mapping((*link)->linked); - join_path_components(to_path, data_dir, PG_TBLSPC_DIR); - /* Create pg_tblspc directory just in case */ - dir_create_dir(to_path, DIR_PERMISSION); + if (!is_absolute_path(linked_path)) + elog(ERROR, "Tablespace directory is not an absolute path: %s\n", + linked_path); - /* Secondly, create link */ - join_path_components(to_path, to_path, link_name); - if (symlink(linked_path, to_path) < 0) - elog(ERROR, "could not create symbolic link \"%s\": %s", - to_path, strerror(errno)); + join_path_components(to_path, data_dir, dir->rel_path); - /* Save linked directory */ - set_tablespace_created(link_name, linked_path); + elog(VERBOSE, "Create directory \"%s\" and symbolic link \"%s\"", + linked_path, to_path); - /* - * Create rest of directories. - * First check is there any directory name after separator. - */ - if (link_sep != NULL && *(link_sep + 1) != '\0') - goto create_directory; + /* create tablespace directory */ + fio_mkdir(linked_path, pg_tablespace_mode, location); - continue; + /* create link to linked_path */ + if (fio_symlink(linked_path, to_path, incremental, location) < 0) + elog(ERROR, "Could not create symbolic link \"%s\": %s", + to_path, strerror(errno)); + + continue; + } } } -create_directory: - elog(LOG, "create directory \"%s\"", relative_ptr); - /* This is not symlink, create directory */ - join_path_components(to_path, data_dir, relative_ptr); - dir_create_dir(to_path, DIR_PERMISSION); + elog(VERBOSE, "Create directory \"%s\"", dir->rel_path); + + join_path_components(to_path, data_dir, dir->rel_path); + fio_mkdir(to_path, dir->mode, location); } if (extract_tablespaces) @@ -1050,13 +1117,10 @@ create_data_directories(const char *data_dir, const char *backup_dir, parray_walk(links, pgFileFree); parray_free(links); } - - parray_walk(dirs, pgFileFree); - parray_free(dirs); } /* - * Read names of symbolik names of tablespaces with links to directories from + * Read names of symbolic names of tablespaces with links to directories from * tablespace_map or tablespace_map.txt. */ void @@ -1071,13 +1135,13 @@ read_tablespace_map(parray *files, const char *backup_dir) join_path_components(map_path, db_path, PG_TABLESPACE_MAP_FILE); /* Exit if database/tablespace_map doesn't exist */ - if (!fileExists(map_path)) + if (!fileExists(map_path, FIO_BACKUP_HOST)) { elog(LOG, "there is no file tablespace_map"); return; } - fp = fopen(map_path, "rt"); + fp = fio_open_stream(map_path, FIO_BACKUP_HOST); if (fp == NULL) elog(ERROR, "cannot open \"%s\": %s", map_path, strerror(errno)); @@ -1093,30 +1157,37 @@ read_tablespace_map(parray *files, const char *backup_dir) file = pgut_new(pgFile); memset(file, 0, sizeof(pgFile)); - file->path = pgut_malloc(strlen(link_name) + 1); - strcpy(file->path, link_name); - - file->linked = pgut_malloc(strlen(path) + 1); - strcpy(file->linked, path); + /* follow the convention for pgFileFree */ + file->name = pgut_strdup(link_name); + file->linked = pgut_strdup(path); + canonicalize_path(file->linked); parray_append(files, file); } - parray_qsort(files, pgFileCompareLinked); - fclose(fp); + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", map_path); + + fio_close_stream(fp); } /* * Check that all tablespace mapping entries have correct linked directory - * paths. Linked directories must be empty or do not exist. + * paths. Linked directories must be empty or do not exist, unless + * we are running incremental restore, then linked directories can be nonempty. * * If tablespace-mapping option is supplied, all OLDDIR entries must have * entries in tablespace_map file. + * + * + * TODO: maybe when running incremental restore with tablespace remapping, then + * new tablespace directory MUST be empty? because there is no way + * we can be sure, that files laying there belong to our instance. */ void -check_tablespace_mapping(pgBackup *backup) +check_tablespace_mapping(pgBackup *backup, bool incremental, bool *tblspaces_are_empty) { - char this_backup_path[MAXPGPATH]; +// char this_backup_path[MAXPGPATH]; parray *links; size_t i; TablespaceListCell *cell; @@ -1124,12 +1195,13 @@ check_tablespace_mapping(pgBackup *backup) links = parray_new(); - pgBackupGetPath(backup, this_backup_path, lengthof(this_backup_path), NULL); - read_tablespace_map(links, this_backup_path); +// pgBackupGetPath(backup, this_backup_path, lengthof(this_backup_path), NULL); + read_tablespace_map(links, backup->root_dir); + /* Sort links by the path of a linked file*/ + parray_qsort(links, pgFileCompareLinked); - if (log_level_console <= LOG || log_level_file <= LOG) - elog(LOG, "check tablespace directories of backup %s", - base36enc(backup->start_time)); + elog(LOG, "check tablespace directories of backup %s", + base36enc(backup->start_time)); /* 1 - each OLDDIR must have an entry in tablespace_map file (links) */ for (cell = tablespace_dirs.head; cell; cell = cell->next) @@ -1140,6 +1212,18 @@ check_tablespace_mapping(pgBackup *backup) elog(ERROR, "--tablespace-mapping option's old directory " "doesn't have an entry in tablespace_map file: \"%s\"", cell->old_dir); + + /* For incremental restore, check that new directory is empty */ +// if (incremental) +// { +// if (!is_absolute_path(cell->new_dir)) +// elog(ERROR, "tablespace directory is not an absolute path: %s\n", +// cell->new_dir); +// +// if (!dir_is_empty(cell->new_dir, FIO_DB_HOST)) +// elog(ERROR, "restore tablespace destination is not empty: \"%s\"", +// cell->new_dir); +// } } /* 2 - all linked directories must be empty */ @@ -1160,9 +1244,13 @@ check_tablespace_mapping(pgBackup *backup) elog(ERROR, "tablespace directory is not an absolute path: %s\n", linked_path); - if (!dir_is_empty(linked_path)) - elog(ERROR, "restore tablespace destination is not empty: \"%s\"", - linked_path); + if (!dir_is_empty(linked_path, FIO_DB_HOST)) + { + if (!incremental) + elog(ERROR, "restore tablespace destination is not empty: \"%s\"", + linked_path); + *tblspaces_are_empty = false; + } } free(tmp_file); @@ -1170,47 +1258,81 @@ check_tablespace_mapping(pgBackup *backup) parray_free(links); } -/* - * Print backup content list. - */ void -print_file_list(FILE *out, const parray *files, const char *root) +check_external_dir_mapping(pgBackup *backup, bool incremental) { - size_t i; + TablespaceListCell *cell; + parray *external_dirs_to_restore; + int i; - /* print each file in the list */ - for (i = 0; i < parray_num(files); i++) + elog(LOG, "check external directories of backup %s", + base36enc(backup->start_time)); + + if (!backup->external_dir_str) { - pgFile *file = (pgFile *) parray_get(files, i); - char *path = file->path; + if (external_remap_list.head) + elog(ERROR, "--external-mapping option's old directory doesn't " + "have an entry in list of external directories of current " + "backup: \"%s\"", external_remap_list.head->old_dir); + return; + } - /* omit root directory portion */ - if (root && strstr(path, root) == path) - path = GetRelativePath(path, root); + external_dirs_to_restore = make_external_directory_list( + backup->external_dir_str, + false); + /* 1 - each OLDDIR must have an entry in external_dirs_to_restore */ + for (cell = external_remap_list.head; cell; cell = cell->next) + { + bool found = false; - fprintf(out, "{\"path\":\"%s\", \"size\":\"" INT64_FORMAT "\", " - "\"mode\":\"%u\", \"is_datafile\":\"%u\", " - "\"is_cfs\":\"%u\", \"crc\":\"%u\", " - "\"compress_alg\":\"%s\"", - path, file->write_size, file->mode, - file->is_datafile ? 1 : 0, file->is_cfs ? 1 : 0, file->crc, - deparse_compress_alg(file->compress_alg)); + for (i = 0; i < parray_num(external_dirs_to_restore); i++) + { + char *external_dir = parray_get(external_dirs_to_restore, i); - if (file->is_datafile) - fprintf(out, ",\"segno\":\"%d\"", file->segno); + if (strcmp(cell->old_dir, external_dir) == 0) + { + /* Swap new dir name with old one, it is used by 2-nd step */ + parray_set(external_dirs_to_restore, i, + pgut_strdup(cell->new_dir)); + pfree(external_dir); -#ifndef WIN32 - if (S_ISLNK(file->mode)) -#else - if (pgwin32_is_junction(file->path)) -#endif - fprintf(out, ",\"linked\":\"%s\"", file->linked); + found = true; + break; + } + } + if (!found) + elog(ERROR, "--external-mapping option's old directory doesn't " + "have an entry in list of external directories of current " + "backup: \"%s\"", cell->old_dir); + } + + /* 2 - all linked directories must be empty */ + for (i = 0; i < parray_num(external_dirs_to_restore); i++) + { + char *external_dir = (char *) parray_get(external_dirs_to_restore, + i); - if (file->n_blocks != BLOCKNUM_INVALID) - fprintf(out, ",\"n_blocks\":\"%i\"", file->n_blocks); + if (!incremental && !dir_is_empty(external_dir, FIO_DB_HOST)) + elog(ERROR, "External directory is not empty: \"%s\"", + external_dir); + } + + free_dir_list(external_dirs_to_restore); +} - fprintf(out, "}\n"); +char * +get_external_remap(char *current_dir) +{ + TablespaceListCell *cell; + + for (cell = external_remap_list.head; cell; cell = cell->next) + { + char *old_dir = cell->old_dir; + + if (strcmp(old_dir, current_dir) == 0) + return cell->new_dir; } + return current_dir; } /* Parsing states for get_control_value() */ @@ -1303,7 +1425,13 @@ get_control_value(const char *str, const char *name, *buf_int64_ptr = '\0'; if (!parse_int64(buf_int64, value_int64, 0)) - goto bad_format; + { + /* We assume that too big value is -1 */ + if (errno == ERANGE) + *value_int64 = BYTES_INVALID; + else + goto bad_format; + } } return true; @@ -1355,34 +1483,49 @@ get_control_value(const char *str, const char *name, * If root is not NULL, path will be absolute path. */ parray * -dir_read_file_list(const char *root, const char *file_txt) +dir_read_file_list(const char *root, const char *external_prefix, + const char *file_txt, fio_location location, pg_crc32 expected_crc) { - FILE *fp; - parray *files; - char buf[MAXPGPATH * 2]; + FILE *fp; + parray *files; + char buf[BLCKSZ]; + char stdio_buf[STDIO_BUFSIZE]; + pg_crc32 content_crc = 0; - fp = fopen(file_txt, "rt"); + fp = fio_open_stream(file_txt, location); if (fp == NULL) - elog(errno == ENOENT ? ERROR : ERROR, - "cannot open \"%s\": %s", file_txt, strerror(errno)); + elog(ERROR, "cannot open \"%s\": %s", file_txt, strerror(errno)); + + /* enable stdio buffering for local file */ + if (!fio_is_remote(location)) + setvbuf(fp, stdio_buf, _IOFBF, STDIO_BUFSIZE); files = parray_new(); + INIT_FILE_CRC32(true, content_crc); + while (fgets(buf, lengthof(buf), fp)) { char path[MAXPGPATH]; - char filepath[MAXPGPATH]; char linked[MAXPGPATH]; char compress_alg_string[MAXPGPATH]; int64 write_size, mode, /* bit length of mode_t depends on platforms */ is_datafile, is_cfs, + external_dir_num, crc, segno, - n_blocks; + n_blocks, + n_headers, + dbOid, /* used for partial restore */ + hdr_crc, + hdr_off, + hdr_size; pgFile *file; + COMP_FILE_CRC32(true, content_crc, buf, strlen(buf)); + get_control_value(buf, "path", path, NULL, true); get_control_value(buf, "size", NULL, &write_size, true); get_control_value(buf, "mode", NULL, &mode, true); @@ -1390,27 +1533,28 @@ dir_read_file_list(const char *root, const char *file_txt) get_control_value(buf, "is_cfs", NULL, &is_cfs, false); get_control_value(buf, "crc", NULL, &crc, true); get_control_value(buf, "compress_alg", compress_alg_string, NULL, false); + get_control_value(buf, "external_dir_num", NULL, &external_dir_num, false); + get_control_value(buf, "dbOid", NULL, &dbOid, false); - if (root) - join_path_components(filepath, root, path); - else - strcpy(filepath, path); - - file = pgFileInit(filepath); - + file = pgFileInit(path); file->write_size = (int64) write_size; file->mode = (mode_t) mode; file->is_datafile = is_datafile ? true : false; file->is_cfs = is_cfs ? true : false; file->crc = (pg_crc32) crc; file->compress_alg = parse_compress_alg(compress_alg_string); + file->external_dir_num = external_dir_num; + file->dbOid = dbOid ? dbOid : 0; /* * Optional fields */ if (get_control_value(buf, "linked", linked, NULL, false) && linked[0]) + { file->linked = pgut_strdup(linked); + canonicalize_path(file->linked); + } if (get_control_value(buf, "segno", NULL, &segno, false)) file->segno = (int) segno; @@ -1418,10 +1562,36 @@ dir_read_file_list(const char *root, const char *file_txt) if (get_control_value(buf, "n_blocks", NULL, &n_blocks, false)) file->n_blocks = (int) n_blocks; + if (get_control_value(buf, "n_headers", NULL, &n_headers, false)) + file->n_headers = (int) n_headers; + + if (get_control_value(buf, "hdr_crc", NULL, &hdr_crc, false)) + file->hdr_crc = (pg_crc32) hdr_crc; + + if (get_control_value(buf, "hdr_off", NULL, &hdr_off, false)) + file->hdr_off = hdr_off; + + if (get_control_value(buf, "hdr_size", NULL, &hdr_size, false)) + file->hdr_size = (int) hdr_size; + parray_append(files, file); } - fclose(fp); + FIN_FILE_CRC32(true, content_crc); + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", file_txt); + + fio_close_stream(fp); + + if (expected_crc != 0 && + expected_crc != content_crc) + { + elog(WARNING, "Invalid CRC of backup control file '%s': %u. Expected: %u", + file_txt, content_crc, expected_crc); + return NULL; + } + return files; } @@ -1429,12 +1599,12 @@ dir_read_file_list(const char *root, const char *file_txt) * Check if directory empty. */ bool -dir_is_empty(const char *path) +dir_is_empty(const char *path, fio_location location) { DIR *dir; struct dirent *dir_ent; - dir = opendir(path); + dir = fio_opendir(path, location); if (dir == NULL) { /* Directory in path doesn't exist */ @@ -1444,7 +1614,7 @@ dir_is_empty(const char *path) } errno = 0; - while ((dir_ent = readdir(dir))) + while ((dir_ent = fio_readdir(dir))) { /* Skip entries point current dir or parent dir */ if (strcmp(dir_ent->d_name, ".") == 0 || @@ -1452,13 +1622,13 @@ dir_is_empty(const char *path) continue; /* Directory is not empty */ - closedir(dir); + fio_closedir(dir); return false; } if (errno) elog(ERROR, "cannot read directory \"%s\": %s", path, strerror(errno)); - closedir(dir); + fio_closedir(dir); return true; } @@ -1467,11 +1637,11 @@ dir_is_empty(const char *path) * Return true if the path is a existing regular file. */ bool -fileExists(const char *path) +fileExists(const char *path, fio_location location) { struct stat buf; - if (stat(path, &buf) == -1 && errno == ENOENT) + if (fio_stat(path, &buf, true, location) == -1 && errno == ENOENT) return false; else if (!S_ISREG(buf.st_mode)) return false; @@ -1489,3 +1659,194 @@ pgFileSize(const char *path) return buf.st_size; } + +/* + * Construct parray containing remapped external directories paths + * from string like /path1:/path2 + */ +parray * +make_external_directory_list(const char *colon_separated_dirs, bool remap) +{ + char *p; + parray *list = parray_new(); + char *tmp = pg_strdup(colon_separated_dirs); + +#ifndef WIN32 +#define EXTERNAL_DIRECTORY_DELIMITER ":" +#else +#define EXTERNAL_DIRECTORY_DELIMITER ";" +#endif + + p = strtok(tmp, EXTERNAL_DIRECTORY_DELIMITER); + while(p!=NULL) + { + char *external_path = pg_strdup(p); + + canonicalize_path(external_path); + if (is_absolute_path(external_path)) + { + if (remap) + { + char *full_path = get_external_remap(external_path); + + if (full_path != external_path) + { + full_path = pg_strdup(full_path); + pfree(external_path); + external_path = full_path; + } + } + parray_append(list, external_path); + } + else + elog(ERROR, "External directory \"%s\" is not an absolute path", + external_path); + + p = strtok(NULL, EXTERNAL_DIRECTORY_DELIMITER); + } + pfree(tmp); + parray_qsort(list, pgCompareString); + return list; +} + +/* Free memory of parray containing strings */ +void +free_dir_list(parray *list) +{ + parray_walk(list, pfree); + parray_free(list); +} + +/* Append to string "path_prefix" int "dir_num" */ +void +makeExternalDirPathByNum(char *ret_path, const char *path_prefix, const int dir_num) +{ + sprintf(ret_path, "%s%d", path_prefix, dir_num); +} + +/* Check if "dir" presents in "dirs_list" */ +bool +backup_contains_external(const char *dir, parray *dirs_list) +{ + void *search_result; + + if (!dirs_list) /* There is no external dirs in backup */ + return false; + search_result = parray_bsearch(dirs_list, dir, pgCompareString); + return search_result != NULL; +} + +/* + * Print database_map + */ +void +print_database_map(FILE *out, parray *database_map) +{ + int i; + + for (i = 0; i < parray_num(database_map); i++) + { + db_map_entry *db_entry = (db_map_entry *) parray_get(database_map, i); + + fio_fprintf(out, "{\"dbOid\":\"%u\", \"datname\":\"%s\"}\n", + db_entry->dbOid, db_entry->datname); + } + +} + +/* + * Create file 'database_map' and add its meta to backup_files_list + * NULL check for database_map must be done by the caller. + */ +void +write_database_map(pgBackup *backup, parray *database_map, parray *backup_files_list) +{ + FILE *fp; + pgFile *file; + char database_dir[MAXPGPATH]; + char database_map_path[MAXPGPATH]; + + join_path_components(database_dir, backup->root_dir, DATABASE_DIR); + join_path_components(database_map_path, database_dir, DATABASE_MAP); + + fp = fio_fopen(database_map_path, PG_BINARY_W, FIO_BACKUP_HOST); + if (fp == NULL) + elog(ERROR, "Cannot open database map \"%s\": %s", database_map_path, + strerror(errno)); + + print_database_map(fp, database_map); + if (fio_fflush(fp) || fio_fclose(fp)) + { + fio_unlink(database_map_path, FIO_BACKUP_HOST); + elog(ERROR, "Cannot write database map \"%s\": %s", + database_map_path, strerror(errno)); + } + + /* Add metadata to backup_content.control */ + file = pgFileNew(database_map_path, DATABASE_MAP, true, 0, + FIO_BACKUP_HOST); + file->crc = pgFileGetCRC(database_map_path, true, false); + file->write_size = file->size; + file->uncompressed_size = file->read_size; + + parray_append(backup_files_list, file); +} + +/* + * read database map, return NULL if database_map in empty or missing + */ +parray * +read_database_map(pgBackup *backup) +{ + FILE *fp; + parray *database_map; + char buf[MAXPGPATH]; + char path[MAXPGPATH]; + char database_map_path[MAXPGPATH]; + +// pgBackupGetPath(backup, path, lengthof(path), DATABASE_DIR); + join_path_components(path, backup->root_dir, DATABASE_DIR); + join_path_components(database_map_path, path, DATABASE_MAP); + + fp = fio_open_stream(database_map_path, FIO_BACKUP_HOST); + if (fp == NULL) + { + /* It is NOT ok for database_map to be missing at this point, so + * we should error here. + * It`s a job of the caller to error if database_map is not empty. + */ + elog(ERROR, "Cannot open \"%s\": %s", database_map_path, strerror(errno)); + } + + database_map = parray_new(); + + while (fgets(buf, lengthof(buf), fp)) + { + char datname[MAXPGPATH]; + int64 dbOid; + + db_map_entry *db_entry = (db_map_entry *) pgut_malloc(sizeof(db_map_entry)); + + get_control_value(buf, "dbOid", NULL, &dbOid, true); + get_control_value(buf, "datname", datname, NULL, true); + + db_entry->dbOid = dbOid; + db_entry->datname = pgut_strdup(datname); + + parray_append(database_map, db_entry); + } + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", database_map_path); + + fio_close_stream(fp); + + /* Return NULL if file is empty */ + if (parray_num(database_map) == 0) + { + parray_free(database_map); + return NULL; + } + + return database_map; +} diff --git a/src/fetch.c b/src/fetch.c index 0d4dbdaaf..bef30dac6 100644 --- a/src/fetch.c +++ b/src/fetch.c @@ -3,23 +3,15 @@ * fetch.c * Functions for fetching files from PostgreSQL data directory * - * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ -#include "postgres_fe.h" - -#include "catalog/catalog.h" +#include "pg_probackup.h" -#include #include -#include -#include #include -#include - -#include "pg_probackup.h" /* * Read a file into memory. The file to be read is /. @@ -33,47 +25,47 @@ * */ char * -slurpFile(const char *datadir, const char *path, size_t *filesize, bool safe) +slurpFile(const char *datadir, const char *path, size_t *filesize, bool safe, fio_location location) { int fd; char *buffer; struct stat statbuf; char fullpath[MAXPGPATH]; int len; - snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path); - if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) == -1) + join_path_components(fullpath, datadir, path); + + if ((fd = fio_open(fullpath, O_RDONLY | PG_BINARY, location)) == -1) { if (safe) return NULL; else - elog(ERROR, "could not open file \"%s\" for reading: %s", + elog(ERROR, "Could not open file \"%s\" for reading: %s", fullpath, strerror(errno)); } - if (fstat(fd, &statbuf) < 0) + if (fio_stat(fullpath, &statbuf, true, location) < 0) { if (safe) return NULL; else - elog(ERROR, "could not open file \"%s\" for reading: %s", + elog(ERROR, "Could not stat file \"%s\": %s", fullpath, strerror(errno)); } len = statbuf.st_size; - buffer = pg_malloc(len + 1); - if (read(fd, buffer, len) != len) + if (fio_read(fd, buffer, len) != len) { if (safe) return NULL; else - elog(ERROR, "could not read file \"%s\": %s\n", + elog(ERROR, "Could not read file \"%s\": %s\n", fullpath, strerror(errno)); } - close(fd); + fio_close(fd); /* Zero-terminate the buffer. */ buffer[len] = '\0'; diff --git a/src/help.c b/src/help.c index 9ad6cc2ef..2b5bcd06e 100644 --- a/src/help.c +++ b/src/help.c @@ -2,10 +2,11 @@ * * help.c * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2019, Postgres Professional * *------------------------------------------------------------------------- */ + #include "pg_probackup.h" static void help_init(void); @@ -15,12 +16,14 @@ static void help_validate(void); static void help_show(void); static void help_delete(void); static void help_merge(void); +static void help_set_backup(void); static void help_set_config(void); static void help_show_config(void); static void help_add_instance(void); static void help_del_instance(void); static void help_archive_push(void); static void help_archive_get(void); +static void help_checkdb(void); void help_command(char *command) @@ -39,6 +42,8 @@ help_command(char *command) help_delete(); else if (strcmp(command, "merge") == 0) help_merge(); + else if (strcmp(command, "set-backup") == 0) + help_set_backup(); else if (strcmp(command, "set-config") == 0) help_set_config(); else if (strcmp(command, "show-config") == 0) @@ -51,6 +56,8 @@ help_command(char *command) help_archive_push(); else if (strcmp(command, "archive-get") == 0) help_archive_get(); + else if (strcmp(command, "checkdb") == 0) + help_checkdb(); else if (strcmp(command, "--help") == 0 || strcmp(command, "help") == 0 || strcmp(command, "-?") == 0 @@ -74,7 +81,9 @@ help_pg_probackup(void) printf(_("\n %s init -B backup-path\n"), PROGRAM_NAME); - printf(_("\n %s set-config -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_("\n %s set-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path]\n")); + printf(_(" [--external-dirs=external-directories-paths]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); printf(_(" [--log-filename=log-filename]\n")); @@ -84,21 +93,34 @@ help_pg_probackup(void) printf(_(" [--log-rotation-age=log-rotation-age]\n")); printf(_(" [--retention-redundancy=retention-redundancy]\n")); printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); printf(_(" [--compress-algorithm=compress-algorithm]\n")); printf(_(" [--compress-level=compress-level]\n")); - printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); - printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); - printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n")); printf(_(" [--archive-timeout=timeout]\n")); - - printf(_("\n %s show-config -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--restore-command=cmdline] [--archive-host=destination]\n")); + printf(_(" [--archive-port=port] [--archive-user=username]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s set-backup -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id [--ttl=interval] [--expire-time=timestamp]\n")); + printf(_(" [--note=text]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s show-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" [--format=format]\n")); + printf(_(" [--help]\n")); printf(_("\n %s backup -B backup-path -b backup-mode --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-C] [--stream [-S slot-name]] [--backup-pg-log]\n")); - printf(_(" [-j num-threads] [--archive-timeout=archive-timeout]\n")); - printf(_(" [--progress]\n")); + printf(_(" [-D pgdata-path] [-C]\n")); + printf(_(" [--stream [-S slot-name]] [--temp-slot]\n")); + printf(_(" [--backup-pg-log] [-j num-threads] [--progress]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [--external-dirs=external-directories-paths]\n")); + printf(_(" [--no-sync]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); printf(_(" [--log-filename=log-filename]\n")); @@ -106,57 +128,116 @@ help_pg_probackup(void) printf(_(" [--log-directory=log-directory]\n")); printf(_(" [--log-rotation-size=log-rotation-size]\n")); printf(_(" [--log-rotation-age=log-rotation-age]\n")); - printf(_(" [--delete-expired] [--delete-wal]\n")); + printf(_(" [--delete-expired] [--delete-wal] [--merge-expired]\n")); printf(_(" [--retention-redundancy=retention-redundancy]\n")); printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); printf(_(" [--compress]\n")); printf(_(" [--compress-algorithm=compress-algorithm]\n")); printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=archive-timeout]\n")); printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); printf(_(" [-w --no-password] [-W --password]\n")); - printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); - printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n")); - - printf(_("\n %s restore -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-D pgdata-dir] [-i backup-id] [--progress]\n")); - printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); - printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR]\n")); - printf(_(" [--immediate] [--recovery-target-name=target-name]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--ttl=interval] [--expire-time=timestamp] [--note=text]\n")); + printf(_(" [--help]\n")); + + + printf(_("\n %s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target=immediate|latest]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); - printf(_(" [--restore-as-replica]\n")); - printf(_(" [--no-validate]\n")); - - printf(_("\n %s validate -B backup-dir [--instance=instance_name]\n"), PROGRAM_NAME); - printf(_(" [-i backup-id] [--progress]\n")); - printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); + printf(_(" [--restore-command=cmdline]\n")); + printf(_(" [-R | --restore-as-replica] [--force]\n")); + printf(_(" [--primary-conninfo=primary_conninfo]\n")); + printf(_(" [-S | --primary-slot-name=slotname]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [-T OLDDIR=NEWDIR] [--progress]\n")); + printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n")); + printf(_(" [--skip-external-dirs] [--no-sync]\n")); + printf(_(" [-I | --incremental-mode=none|checksum|lsn]\n")); + printf(_(" [--db-include | --db-exclude]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--archive-host=hostname]\n")); + printf(_(" [--archive-port=port] [--archive-user=username]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); + printf(_(" [-i backup-id] [--progress] [-j num-threads]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); printf(_(" [--recovery-target-name=target-name]\n")); - printf(_(" [--timeline=timeline]\n")); + printf(_(" [--skip-block-validation]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s checkdb [-B backup-path] [--instance=instance_name]\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [--progress] [-j num-threads]\n")); + printf(_(" [--amcheck] [--skip-block-validation]\n")); + printf(_(" [--heapallindexed]\n")); + printf(_(" [--help]\n")); - printf(_("\n %s show -B backup-dir\n"), PROGRAM_NAME); + printf(_("\n %s show -B backup-path\n"), PROGRAM_NAME); printf(_(" [--instance=instance_name [-i backup-id]]\n")); - printf(_(" [--format=format]\n")); + printf(_(" [--format=format] [--archive]\n")); + printf(_(" [--help]\n")); - printf(_("\n %s delete -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [--wal] [-i backup-id | --expired]\n")); - printf(_("\n %s merge -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" -i backup-id\n")); + printf(_("\n %s delete -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-j num-threads] [--progress]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); + printf(_(" [-i backup-id | --delete-expired | --merge-expired | --status=backup_status]\n")); + printf(_(" [--delete-wal]\n")); + printf(_(" [--dry-run]\n")); + printf(_(" [--help]\n")); - printf(_("\n %s add-instance -B backup-dir -D pgdata-dir\n"), PROGRAM_NAME); + printf(_("\n %s merge -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id [--progress] [-j num-threads]\n")); + printf(_(" [--help]\n")); + + printf(_("\n %s add-instance -B backup-path -D pgdata-path\n"), PROGRAM_NAME); printf(_(" --instance=instance_name\n")); + printf(_(" [--external-dirs=external-directories-paths]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--help]\n")); - printf(_("\n %s del-instance -B backup-dir\n"), PROGRAM_NAME); + printf(_("\n %s del-instance -B backup-path\n"), PROGRAM_NAME); printf(_(" --instance=instance_name\n")); + printf(_(" [--help]\n")); - printf(_("\n %s archive-push -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" --wal-file-path=wal-file-path\n")); + printf(_("\n %s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" --wal-file-name=wal-file-name\n")); - printf(_(" [--compress [--compress-level=compress-level]]\n")); - printf(_(" [--overwrite]\n")); + printf(_(" [-j num-threads] [--batch-size=batch_size]\n")); + printf(_(" [--archive-timeout=timeout]\n")); + printf(_(" [--no-ready-rename] [--no-sync]\n")); + printf(_(" [--overwrite] [--compress]\n")); + printf(_(" [--compress-algorithm=compress-algorithm]\n")); + printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--help]\n")); - printf(_("\n %s archive-get -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_("\n %s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" --wal-file-path=wal-file-path\n")); printf(_(" --wal-file-name=wal-file-name\n")); + printf(_(" [-j num-threads] [--batch-size=batch_size]\n")); + printf(_(" [--no-validate-wal]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--help]\n")); if ((PROGRAM_URL || PROGRAM_EMAIL)) { @@ -172,17 +253,20 @@ help_pg_probackup(void) static void help_init(void) { - printf(_("%s init -B backup-path\n\n"), PROGRAM_NAME); - printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_("\n%s init -B backup-path\n\n"), PROGRAM_NAME); + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n\n")); } static void help_backup(void) { - printf(_("%s backup -B backup-path -b backup-mode --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-C] [--stream [-S slot-name]] [--backup-pg-log]\n")); - printf(_(" [-j num-threads] [--archive-timeout=archive-timeout]\n")); - printf(_(" [--progress]\n")); + printf(_("\n%s backup -B backup-path -b backup-mode --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-C]\n")); + printf(_(" [--stream [-S slot-name] [--temp-slot]\n")); + printf(_(" [--backup-pg-log] [-j num-threads] [--progress]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [-E external-directories-paths]\n")); + printf(_(" [--no-sync]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); printf(_(" [--log-filename=log-filename]\n")); @@ -190,28 +274,40 @@ help_backup(void) printf(_(" [--log-directory=log-directory]\n")); printf(_(" [--log-rotation-size=log-rotation-size]\n")); printf(_(" [--log-rotation-age=log-rotation-age]\n")); - printf(_(" [--delete-expired] [--delete-wal]\n")); + printf(_(" [--delete-expired] [--delete-wal] [--merge-expired]\n")); printf(_(" [--retention-redundancy=retention-redundancy]\n")); printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); printf(_(" [--compress]\n")); printf(_(" [--compress-algorithm=compress-algorithm]\n")); printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=archive-timeout]\n")); printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); printf(_(" [-w --no-password] [-W --password]\n")); - printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); - printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--ttl=interval] [--expire-time=timestamp] [--note=text]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" -b, --backup-mode=backup-mode backup mode=FULL|PAGE|DELTA|PTRACK\n")); printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); printf(_(" -C, --smooth-checkpoint do smooth checkpoint before backup\n")); printf(_(" --stream stream the transaction log and include it in the backup\n")); printf(_(" -S, --slot=SLOTNAME replication slot to use\n")); - printf(_(" --backup-pg-log backup of pg_log directory\n")); + printf(_(" --temp-slot use temporary replication slot\n")); + printf(_(" --backup-pg-log backup of '%s' directory\n"), PG_LOG_DIR); printf(_(" -j, --threads=NUM number of parallel threads\n")); - printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); printf(_(" --progress show progress\n")); + printf(_(" --no-validate disable validation after backup\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + printf(_(" -E --external-dirs=external-directories-paths\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); + printf(_(" --no-sync do not sync backed up files to disk\n")); + printf(_(" --note=text add note to backup\n")); + printf(_(" (example: --note='backup before app update to v13.1')\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -222,86 +318,156 @@ help_backup(void) printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); printf(_(" --log-filename=log-filename\n")); printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); - printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); printf(_(" --error-log-filename=error-log-filename\n")); printf(_(" filename for error logging (default: none)\n")); printf(_(" --log-directory=log-directory\n")); printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); printf(_(" --log-rotation-size=log-rotation-size\n")); - printf(_(" rotate logfile if its size exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'KB', 'MB', 'GB', 'TB' (default: KB)\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); printf(_(" --log-rotation-age=log-rotation-age\n")); - printf(_(" rotate logfile if its age exceed this value; 0 disables; (default: 0)\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); printf(_("\n Retention options:\n")); printf(_(" --delete-expired delete backups expired according to current\n")); printf(_(" retention policy after successful backup completion\n")); - printf(_(" --delete-wal remove redundant archived wal files\n")); + printf(_(" --merge-expired merge backups expired according to current\n")); + printf(_(" retention policy after successful backup completion\n")); + printf(_(" --delete-wal remove redundant files in WAL archive\n")); printf(_(" --retention-redundancy=retention-redundancy\n")); printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); printf(_(" --retention-window=retention-window\n")); printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --wal-depth=wal-depth number of latest valid backups per timeline that must\n")); + printf(_(" retain the ability to perform PITR; 0 disables; (default: 0)\n")); + printf(_(" --dry-run perform a trial run without any changes\n")); + + printf(_("\n Pinning options:\n")); + printf(_(" --ttl=interval pin backup for specified amount of time; 0 unpin\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: s)\n")); + printf(_(" (example: --ttl=20d)\n")); + printf(_(" --expire-time=time pin backup until specified time stamp\n")); + printf(_(" (example: --expire-time='2024-01-01 00:00:00+03')\n")); printf(_("\n Compression options:\n")); - printf(_(" --compress compress data files\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); printf(_(" --compress-algorithm=compress-algorithm\n")); - printf(_(" available options: 'zlib', 'pglz', 'none' (default: zlib)\n")); + printf(_(" available options: 'zlib', 'pglz', 'none' (default: none)\n")); printf(_(" --compress-level=compress-level\n")); - printf(_(" level of compression [0-9] (default: 6)\n")); + printf(_(" level of compression [0-9] (default: 1)\n")); + + printf(_("\n Archive options:\n")); + printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); printf(_("\n Connection options:\n")); - printf(_(" -U, --username=USERNAME user name to connect as (default: current local user)\n")); - printf(_(" -d, --dbname=DBNAME database to connect (default: username)\n")); - printf(_(" -h, --host=HOSTNAME database server host or socket directory(default: 'local socket')\n")); - printf(_(" -p, --port=PORT database server port (default: 5432)\n")); + printf(_(" -U, --pguser=USERNAME user name to connect as (default: current local user)\n")); + printf(_(" -d, --pgdatabase=DBNAME database to connect (default: username)\n")); + printf(_(" -h, --pghost=HOSTNAME database server host or socket directory(default: 'local socket')\n")); + printf(_(" -p, --pgport=PORT database server port (default: 5432)\n")); printf(_(" -w, --no-password never prompt for password\n")); printf(_(" -W, --password force password prompt\n")); + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + printf(_("\n Replica options:\n")); - printf(_(" --master-user=user_name user name to connect to master\n")); - printf(_(" --master-db=db_name database to connect to master\n")); - printf(_(" --master-host=host_name database server host of master\n")); - printf(_(" --master-port=port database server port of master\n")); - printf(_(" --replica-timeout=timeout wait timeout for WAL segment streaming through replication (default: 5min)\n")); + printf(_(" --master-user=user_name user name to connect to master (deprecated)\n")); + printf(_(" --master-db=db_name database to connect to master (deprecated)\n")); + printf(_(" --master-host=host_name database server host of master (deprecated)\n")); + printf(_(" --master-port=port database server port of master (deprecated)\n")); + printf(_(" --replica-timeout=timeout wait timeout for WAL segment streaming through replication (deprecated)\n\n")); } static void help_restore(void) { - printf(_("%s restore -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-D pgdata-dir] [-i backup-id] [--progress]\n")); - printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); - printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR]\n")); - printf(_(" [--immediate] [--recovery-target-name=target-name]\n")); + printf(_("\n%s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads]\n")); + printf(_(" [--progress] [--force] [--no-sync]\n")); + printf(_(" [--no-validate] [--skip-block-validation]\n")); + printf(_(" [-T OLDDIR=NEWDIR]\n")); + printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n")); + printf(_(" [--skip-external-dirs]\n")); + printf(_(" [-I | --incremental-mode=none|checksum|lsn]\n")); + printf(_(" [--db-include dbname | --db-exclude dbname]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target=immediate|latest]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); - printf(_(" [--restore-as-replica] [--no-validate]\n\n")); + printf(_(" [--restore-command=cmdline]\n")); + printf(_(" [-R | --restore-as-replica]\n")); + printf(_(" [--primary-conninfo=primary_conninfo]\n")); + printf(_(" [-S | --primary-slot-name=slotname]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [--archive-host=hostname] [--archive-port=port]\n")); + printf(_(" [--archive-user=username]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); - printf(_(" -D, --pgdata=pgdata-dir location of the database storage area\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); printf(_(" -i, --backup-id=backup-id backup to restore\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); printf(_(" --progress show progress\n")); - printf(_(" --time=time time stamp up to which recovery will proceed\n")); - printf(_(" --xid=xid transaction ID up to which recovery will proceed\n")); - printf(_(" --lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); - printf(_(" --inclusive=boolean whether we stop just after the recovery target\n")); - printf(_(" --timeline=timeline recovering into a particular timeline\n")); + printf(_(" --force ignore invalid status of the restored backup\n")); + printf(_(" --no-sync do not sync restored files to disk\n")); + printf(_(" --no-validate disable backup validation during restore\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n")); printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n")); - - printf(_(" --immediate end recovery as soon as a consistent state is reached\n")); + printf(_(" --external-mapping=OLDDIR=NEWDIR\n")); + printf(_(" relocate the external directory from OLDDIR to NEWDIR\n")); + printf(_(" --skip-external-dirs do not restore all external directories\n")); + + printf(_("\n Incremental restore options:\n")); + printf(_(" -I, --incremental-mode=none|checksum|lsn\n")); + printf(_(" reuse valid pages available in PGDATA if they have not changed\n")); + printf(_(" (default: none)\n")); + + printf(_("\n Partial restore options:\n")); + printf(_(" --db-include dbname restore only specified databases\n")); + printf(_(" --db-exclude dbname do not restore specified databases\n")); + + printf(_("\n Recovery options:\n")); + printf(_(" --recovery-target-time=time time stamp up to which recovery will proceed\n")); + printf(_(" --recovery-target-xid=xid transaction ID up to which recovery will proceed\n")); + printf(_(" --recovery-target-lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); + printf(_(" --recovery-target-inclusive=boolean\n")); + printf(_(" whether we stop just after the recovery target\n")); + printf(_(" --recovery-target-timeline=timeline\n")); + printf(_(" recovering into a particular timeline\n")); + printf(_(" --recovery-target=immediate|latest\n")); + printf(_(" end recovery as soon as a consistent state is reached or as late as possible\n")); printf(_(" --recovery-target-name=target-name\n")); printf(_(" the named restore point to which recovery will proceed\n")); printf(_(" --recovery-target-action=pause|promote|shutdown\n")); printf(_(" action the server should take once the recovery target is reached\n")); printf(_(" (default: pause)\n")); + printf(_(" --restore-command=cmdline command to use as 'restore_command' in recovery.conf; 'none' disables\n")); + printf(_("\n Standby options:\n")); printf(_(" -R, --restore-as-replica write a minimal recovery.conf in the output directory\n")); printf(_(" to ease setting up a standby server\n")); - printf(_(" --no-validate disable backup validation during restore\n")); + printf(_(" --primary-conninfo=primary_conninfo\n")); + printf(_(" connection string to be used for establishing connection\n")); + printf(_(" with the primary server\n")); + printf(_(" -S, --primary-slot-name=slotname replication slot to be used for WAL streaming from the primary server\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -312,39 +478,106 @@ help_restore(void) printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); printf(_(" --log-filename=log-filename\n")); printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); - printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); printf(_(" --error-log-filename=error-log-filename\n")); printf(_(" filename for error logging (default: none)\n")); printf(_(" --log-directory=log-directory\n")); printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); printf(_(" --log-rotation-size=log-rotation-size\n")); - printf(_(" rotate logfile if its size exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'KB', 'MB', 'GB', 'TB' (default: KB)\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); printf(_(" --log-rotation-age=log-rotation-age\n")); - printf(_(" rotate logfile if its age exceed this value; 0 disables; (default: 0)\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + + printf(_("\n Remote WAL archive options:\n")); + printf(_(" --archive-host=destination address or hostname for ssh connection to archive host\n")); + printf(_(" --archive-port=port port for ssh connection to archive host (default: 22)\n")); + printf(_(" --archive-user=username user name for ssh connection to archive host (default: PostgreSQL user)\n\n")); } static void help_validate(void) { - printf(_("%s validate -B backup-dir [--instance=instance_name]\n"), PROGRAM_NAME); - printf(_(" [-i backup-id] [--progress]\n")); - printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); - printf(_(" [--timeline=timeline]\n\n")); + printf(_("\n%s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); + printf(_(" [-i backup-id] [--progress] [-j num-threads]\n")); + printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n")); + printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n")); + printf(_(" [--recovery-target-timeline=timeline]\n")); + printf(_(" [--recovery-target-name=target-name]\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); printf(_(" -i, --backup-id=backup-id backup to validate\n")); printf(_(" --progress show progress\n")); - printf(_(" --time=time time stamp up to which recovery will proceed\n")); - printf(_(" --xid=xid transaction ID up to which recovery will proceed\n")); - printf(_(" --lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); - printf(_(" --inclusive=boolean whether we stop just after the recovery target\n")); - printf(_(" --timeline=timeline recovering into a particular timeline\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --recovery-target-time=time time stamp up to which recovery will proceed\n")); + printf(_(" --recovery-target-xid=xid transaction ID up to which recovery will proceed\n")); + printf(_(" --recovery-target-lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); + printf(_(" --recovery-target-inclusive=boolean\n")); + printf(_(" whether we stop just after the recovery target\n")); + printf(_(" --recovery-target-timeline=timeline\n")); + printf(_(" recovering into a particular timeline\n")); printf(_(" --recovery-target-name=target-name\n")); printf(_(" the named restore point to which recovery will proceed\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + + printf(_("\n Logging options:\n")); + printf(_(" --log-level-console=log-level-console\n")); + printf(_(" level for console logging (default: info)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-level-file=log-level-file\n")); + printf(_(" level for file logging (default: off)\n")); + printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); + printf(_(" --log-filename=log-filename\n")); + printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); + printf(_(" --error-log-filename=error-log-filename\n")); + printf(_(" filename for error logging (default: none)\n")); + printf(_(" --log-directory=log-directory\n")); + printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); + printf(_(" --log-rotation-size=log-rotation-size\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); + printf(_(" --log-rotation-age=log-rotation-age\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n\n")); +} + +static void +help_checkdb(void) +{ + printf(_("\n%s checkdb [-B backup-path] [--instance=instance_name]\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path] [-j num-threads] [--progress]\n")); + printf(_(" [--amcheck] [--skip-block-validation]\n")); + printf(_(" [--heapallindexed]\n\n")); + + printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); + printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); + + printf(_(" --progress show progress\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --skip-block-validation skip file-level checking\n")); + printf(_(" can be used only with '--amcheck' option\n")); + printf(_(" --amcheck in addition to file-level block checking\n")); + printf(_(" check btree indexes via function 'bt_index_check()'\n")); + printf(_(" using 'amcheck' or 'amcheck_next' extensions\n")); + printf(_(" --heapallindexed also check that heap is indexed\n")); + printf(_(" can be used only with '--amcheck' option\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -361,38 +594,65 @@ help_validate(void) printf(_(" --log-directory=log-directory\n")); printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); printf(_(" --log-rotation-size=log-rotation-size\n")); - printf(_(" rotate logfile if its size exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'KB', 'MB', 'GB', 'TB' (default: KB)\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); printf(_(" --log-rotation-age=log-rotation-age\n")); - printf(_(" rotate logfile if its age exceed this value; 0 disables; (default: 0)\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + + printf(_("\n Connection options:\n")); + printf(_(" -U, --pguser=USERNAME user name to connect as (default: current local user)\n")); + printf(_(" -d, --pgdatabase=DBNAME database to connect (default: username)\n")); + printf(_(" -h, --pghost=HOSTNAME database server host or socket directory(default: 'local socket')\n")); + printf(_(" -p, --pgport=PORT database server port (default: 5432)\n")); + printf(_(" -w, --no-password never prompt for password\n")); + printf(_(" -W, --password force password prompt\n\n")); } static void help_show(void) { - printf(_("%s show -B backup-dir\n"), PROGRAM_NAME); + printf(_("\n%s show -B backup-path\n"), PROGRAM_NAME); printf(_(" [--instance=instance_name [-i backup-id]]\n")); - printf(_(" [--format=format]\n\n")); + printf(_(" [--format=format] [--archive]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); - printf(_(" --instance=instance_name show info about specific intstance\n")); + printf(_(" --instance=instance_name show info about specific instance\n")); printf(_(" -i, --backup-id=backup-id show info about specific backups\n")); - printf(_(" --format=format show format=PLAIN|JSON\n")); + printf(_(" --archive show WAL archive information\n")); + printf(_(" --format=format show format=PLAIN|JSON\n\n")); } static void help_delete(void) { - printf(_("%s delete -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-i backup-id | --expired] [--wal]\n\n")); + printf(_("\n%s delete -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-i backup-id | --delete-expired | --merge-expired] [--delete-wal]\n")); + printf(_(" [-j num-threads] [--progress]\n")); + printf(_(" [--retention-redundancy=retention-redundancy]\n")); + printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); printf(_(" -i, --backup-id=backup-id backup to delete\n")); - printf(_(" --expired delete backups expired according to current\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --progress show progress\n")); + + printf(_("\n Retention options:\n")); + printf(_(" --delete-expired delete backups expired according to current\n")); printf(_(" retention policy\n")); - printf(_(" --wal remove unnecessary wal files in WAL ARCHIVE\n")); + printf(_(" --merge-expired merge backups expired according to current\n")); + printf(_(" retention policy\n")); + printf(_(" --delete-wal remove redundant files in WAL archive\n")); + printf(_(" --retention-redundancy=retention-redundancy\n")); + printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); + printf(_(" --retention-window=retention-window\n")); + printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --wal-depth=wal-depth number of latest valid backups per timeline that must\n")); + printf(_(" retain the ability to perform PITR; 0 disables; (default: 0)\n")); + printf(_(" --dry-run perform a trial run without any changes\n")); + printf(_(" --status=backup_status delete all backups with specified status\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -403,23 +663,23 @@ help_delete(void) printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); printf(_(" --log-filename=log-filename\n")); printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); - printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); printf(_(" --error-log-filename=error-log-filename\n")); printf(_(" filename for error logging (default: none)\n")); printf(_(" --log-directory=log-directory\n")); printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); printf(_(" --log-rotation-size=log-rotation-size\n")); - printf(_(" rotate logfile if its size exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'KB', 'MB', 'GB', 'TB' (default: KB)\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); printf(_(" --log-rotation-age=log-rotation-age\n")); - printf(_(" rotate logfile if its age exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n\n")); } static void help_merge(void) { - printf(_("%s merge -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_("\n%s merge -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" -i backup-id [-j num-threads] [--progress]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); @@ -445,23 +705,42 @@ help_merge(void) printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); printf(_(" --log-filename=log-filename\n")); printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); - printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); printf(_(" --error-log-filename=error-log-filename\n")); printf(_(" filename for error logging (default: none)\n")); printf(_(" --log-directory=log-directory\n")); printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); printf(_(" --log-rotation-size=log-rotation-size\n")); - printf(_(" rotate logfile if its size exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'KB', 'MB', 'GB', 'TB' (default: KB)\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); printf(_(" --log-rotation-age=log-rotation-age\n")); - printf(_(" rotate logfile if its age exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n\n")); +} + +static void +help_set_backup(void) +{ + printf(_("\n%s set-backup -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" -i backup-id\n")); + printf(_(" [--ttl=interval] [--expire-time=time] [--note=text]\n\n")); + + printf(_(" --ttl=interval pin backup for specified amount of time; 0 unpin\n")); + printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: s)\n")); + printf(_(" (example: --ttl=20d)\n")); + printf(_(" --expire-time=time pin backup until specified time stamp\n")); + printf(_(" (example: --expire-time='2024-01-01 00:00:00+03')\n")); + printf(_(" --note=text add note to backup; 'none' to remove note\n")); + printf(_(" (example: --note='backup before app update to v13.1')\n")); } static void help_set_config(void) { - printf(_("%s set-config -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_("\n%s set-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); + printf(_(" [-D pgdata-path]\n")); + printf(_(" [-E external-directories-paths]\n")); + printf(_(" [--restore-command=cmdline]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); printf(_(" [--log-filename=log-filename]\n")); @@ -471,16 +750,22 @@ help_set_config(void) printf(_(" [--log-rotation-age=log-rotation-age]\n")); printf(_(" [--retention-redundancy=retention-redundancy]\n")); printf(_(" [--retention-window=retention-window]\n")); + printf(_(" [--wal-depth=wal-depth]\n")); printf(_(" [--compress-algorithm=compress-algorithm]\n")); printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--archive-timeout=timeout]\n")); printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); - printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); - printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n\n")); - printf(_(" [--archive-timeout=timeout]\n\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); + printf(_(" -E --external-dirs=external-directories-paths\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); + printf(_(" --restore-command=cmdline command to use as 'restore_command' in recovery.conf; 'none' disables\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -491,16 +776,16 @@ help_set_config(void) printf(_(" available options: 'off', 'error', 'warning', 'info', 'log', 'verbose'\n")); printf(_(" --log-filename=log-filename\n")); printf(_(" filename for file logging (default: 'pg_probackup.log')\n")); - printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log\n")); + printf(_(" support strftime format (example: pg_probackup-%%Y-%%m-%%d_%%H%%M%%S.log)\n")); printf(_(" --error-log-filename=error-log-filename\n")); printf(_(" filename for error logging (default: none)\n")); printf(_(" --log-directory=log-directory\n")); printf(_(" directory for file logging (default: BACKUP_PATH/log)\n")); printf(_(" --log-rotation-size=log-rotation-size\n")); - printf(_(" rotate logfile if its size exceed this value; 0 disables; (default: 0)\n")); - printf(_(" available units: 'KB', 'MB', 'GB', 'TB' (default: KB)\n")); + printf(_(" rotate logfile if its size exceeds this value; 0 disables; (default: 0)\n")); + printf(_(" available units: 'kB', 'MB', 'GB', 'TB' (default: kB)\n")); printf(_(" --log-rotation-age=log-rotation-age\n")); - printf(_(" rotate logfile if its age exceed this value; 0 disables; (default: 0)\n")); + printf(_(" rotate logfile if its age exceeds this value; 0 disables; (default: 0)\n")); printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: min)\n")); printf(_("\n Retention options:\n")); @@ -508,87 +793,154 @@ help_set_config(void) printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); printf(_(" --retention-window=retention-window\n")); printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --wal-depth=wal-depth number of latest valid backups with ability to perform\n")); + printf(_(" the point in time recovery; disables; (default: 0)\n")); printf(_("\n Compression options:\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); printf(_(" --compress-algorithm=compress-algorithm\n")); - printf(_(" available options: 'zlib','pglz','none'\n")); + printf(_(" available options: 'zlib','pglz','none' (default: 'none')\n")); printf(_(" --compress-level=compress-level\n")); - printf(_(" level of compression [0-9] (default: 6)\n")); + printf(_(" level of compression [0-9] (default: 1)\n")); + + printf(_("\n Archive options:\n")); + printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); printf(_("\n Connection options:\n")); - printf(_(" -U, --username=USERNAME user name to connect as (default: current local user)\n")); - printf(_(" -d, --dbname=DBNAME database to connect (default: username)\n")); - printf(_(" -h, --host=HOSTNAME database server host or socket directory(default: 'local socket')\n")); - printf(_(" -p, --port=PORT database server port (default: 5432)\n")); + printf(_(" -U, --pguser=USERNAME user name to connect as (default: current local user)\n")); + printf(_(" -d, --pgdatabase=DBNAME database to connect (default: username)\n")); + printf(_(" -h, --pghost=HOSTNAME database server host or socket directory(default: 'local socket')\n")); + printf(_(" -p, --pgport=PORT database server port (default: 5432)\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + + printf(_("\n Remote WAL archive options:\n")); + printf(_(" --archive-host=destination address or hostname for ssh connection to archive host\n")); + printf(_(" --archive-port=port port for ssh connection to archive host (default: 22)\n")); + printf(_(" --archive-user=username user name for ssh connection to archive host (default: PostgreSQL user)\n")); printf(_("\n Replica options:\n")); - printf(_(" --master-user=user_name user name to connect to master\n")); - printf(_(" --master-db=db_name database to connect to master\n")); - printf(_(" --master-host=host_name database server host of master\n")); - printf(_(" --master-port=port database server port of master\n")); - printf(_(" --replica-timeout=timeout wait timeout for WAL segment streaming through replication (default: 5min)\n")); - printf(_("\n Archive options:\n")); - printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); + printf(_(" --master-user=user_name user name to connect to master (deprecated)\n")); + printf(_(" --master-db=db_name database to connect to master (deprecated)\n")); + printf(_(" --master-host=host_name database server host of master (deprecated)\n")); + printf(_(" --master-port=port database server port of master (deprecated)\n")); + printf(_(" --replica-timeout=timeout wait timeout for WAL segment streaming through replication (deprecated)\n\n")); } static void help_show_config(void) { - printf(_("%s show-config -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_("\n%s show-config -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" [--format=format]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); - printf(_(" --format=format show format=PLAIN|JSON\n")); + printf(_(" --format=format show format=PLAIN|JSON\n\n")); } static void help_add_instance(void) { - printf(_("%s add-instance -B backup-dir -D pgdata-dir\n"), PROGRAM_NAME); - printf(_(" --instance=instance_name\n\n")); + printf(_("\n%s add-instance -B backup-path -D pgdata-path\n"), PROGRAM_NAME); + printf(_(" --instance=instance_name\n")); + printf(_(" [-E external-directory-path]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); - printf(_(" -D, --pgdata=pgdata-dir location of the database storage area\n")); + printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); printf(_(" --instance=instance_name name of the new instance\n")); + + printf(_(" -E --external-dirs=external-directories-paths\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=destination remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n\n")); } static void help_del_instance(void) { - printf(_("%s del-instance -B backup-dir --instance=instance_name\n\n"), PROGRAM_NAME); + printf(_("\n%s del-instance -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); - printf(_(" --instance=instance_name name of the instance to delete\n")); + printf(_(" --instance=instance_name name of the instance to delete\n\n")); } static void help_archive_push(void) { - printf(_("\n %s archive-push -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" --wal-file-path=wal-file-path\n")); + printf(_("\n%s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" --wal-file-name=wal-file-name\n")); - printf(_(" [--compress [--compress-level=compress-level]]\n")); - printf(_(" [--overwrite]\n\n")); + printf(_(" [-j num-threads] [--batch-size=batch_size]\n")); + printf(_(" [--archive-timeout=timeout]\n")); + printf(_(" [--no-ready-rename] [--no-sync]\n")); + printf(_(" [--overwrite] [--compress]\n")); + printf(_(" [--compress-algorithm=compress-algorithm]\n")); + printf(_(" [--compress-level=compress-level]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance to delete\n")); - printf(_(" --wal-file-path=wal-file-path\n")); - printf(_(" relative path name of the WAL file on the server\n")); printf(_(" --wal-file-name=wal-file-name\n")); - printf(_(" name of the WAL file to retrieve from the server\n")); - printf(_(" --compress compress WAL file during archiving\n")); - printf(_(" --compress-level=compress-level\n")); - printf(_(" level of compression [0-9]\n")); + printf(_(" name of the file to copy into WAL archive\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --batch-size=NUM number of files to be copied\n")); + printf(_(" --archive-timeout=timeout wait timeout before discarding stale temp file(default: 5min)\n")); + printf(_(" --no-ready-rename do not rename '.ready' files in 'archive_status' directory\n")); + printf(_(" --no-sync do not sync WAL file to disk\n")); printf(_(" --overwrite overwrite archived WAL file\n")); + + printf(_("\n Compression options:\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); + printf(_(" --compress-algorithm=compress-algorithm\n")); + printf(_(" available options: 'zlib','pglz','none' (default: 'none')\n")); + printf(_(" --compress-level=compress-level\n")); + printf(_(" level of compression [0-9] (default: 1)\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=hostname remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n\n")); } static void help_archive_get(void) { - printf(_("\n %s archive-get -B backup-dir --instance=instance_name\n"), PROGRAM_NAME); + printf(_("\n%s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" --wal-file-path=wal-file-path\n")); - printf(_(" --wal-file-name=wal-file-name\n\n")); + printf(_(" --wal-file-name=wal-file-name\n")); + printf(_(" [-j num-threads] [--batch-size=batch_size]\n")); + printf(_(" [--no-validate-wal]\n")); + printf(_(" [--remote-proto] [--remote-host]\n")); + printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); + printf(_(" [--ssh-options]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance to delete\n")); @@ -596,4 +948,19 @@ help_archive_get(void) printf(_(" relative destination path name of the WAL file on the server\n")); printf(_(" --wal-file-name=wal-file-name\n")); printf(_(" name of the WAL file to retrieve from the archive\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --batch-size=NUM number of files to be prefetched\n")); + printf(_(" --prefetch-dir=path location of the store area for prefetched WAL files\n")); + printf(_(" --no-validate-wal skip validation of prefetched WAL file before using it\n")); + + printf(_("\n Remote options:\n")); + printf(_(" --remote-proto=protocol remote protocol to use\n")); + printf(_(" available options: 'ssh', 'none' (default: ssh)\n")); + printf(_(" --remote-host=hostname remote host address or hostname\n")); + printf(_(" --remote-port=port remote host port (default: 22)\n")); + printf(_(" --remote-path=path path to directory with pg_probackup binary on remote host\n")); + printf(_(" (default: current binary path)\n")); + printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); + printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n\n")); } diff --git a/src/init.c b/src/init.c index 712cba11c..431ea3b70 100644 --- a/src/init.c +++ b/src/init.c @@ -3,7 +3,7 @@ * init.c: - initialize backup catalog. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -11,7 +11,6 @@ #include "pg_probackup.h" #include -#include #include /* @@ -22,7 +21,7 @@ do_init(void) { char path[MAXPGPATH]; char arclog_path_dir[MAXPGPATH]; - int results; + int results; results = pg_check_dir(backup_path); if (results == 4) /* exists and not empty*/ @@ -50,55 +49,82 @@ do_init(void) } int -do_add_instance(void) +do_add_instance(InstanceConfig *instance) { char path[MAXPGPATH]; char arclog_path_dir[MAXPGPATH]; - struct stat st; - pgBackupConfig *config = pgut_new(pgBackupConfig); + struct stat st; /* PGDATA is always required */ - if (pgdata == NULL) + if (instance->pgdata == NULL) elog(ERROR, "Required parameter not specified: PGDATA " "(-D, --pgdata)"); /* Read system_identifier from PGDATA */ - system_identifier = get_system_identifier(pgdata); + instance->system_identifier = get_system_identifier(instance->pgdata); + /* Starting from PostgreSQL 11 read WAL segment size from PGDATA */ + instance->xlog_seg_size = get_xlog_seg_size(instance->pgdata); /* Ensure that all root directories already exist */ if (access(backup_path, F_OK) != 0) - elog(ERROR, "%s directory does not exist.", backup_path); + elog(ERROR, "Directory does not exist: '%s'", backup_path); join_path_components(path, backup_path, BACKUPS_DIR); if (access(path, F_OK) != 0) - elog(ERROR, "%s directory does not exist.", path); + elog(ERROR, "Directory does not exist: '%s'", path); join_path_components(arclog_path_dir, backup_path, "wal"); if (access(arclog_path_dir, F_OK) != 0) - elog(ERROR, "%s directory does not exist.", arclog_path_dir); + elog(ERROR, "Directory does not exist: '%s'", arclog_path_dir); - /* Create directory for data files of this specific instance */ - if (stat(backup_instance_path, &st) == 0 && S_ISDIR(st.st_mode)) - elog(ERROR, "instance '%s' already exists", backup_instance_path); - dir_create_dir(backup_instance_path, DIR_PERMISSION); + if (stat(instance->backup_instance_path, &st) == 0 && S_ISDIR(st.st_mode)) + elog(ERROR, "Instance '%s' backup directory already exists: '%s'", + instance->name, instance->backup_instance_path); /* * Create directory for wal files of this specific instance. * Existence check is extra paranoid because if we don't have such a * directory in data dir, we shouldn't have it in wal as well. */ - if (stat(arclog_path, &st) == 0 && S_ISDIR(st.st_mode)) - elog(ERROR, "arclog_path '%s' already exists", arclog_path); - dir_create_dir(arclog_path, DIR_PERMISSION); + if (stat(instance->arclog_path, &st) == 0 && S_ISDIR(st.st_mode)) + elog(ERROR, "Instance '%s' WAL archive directory already exists: '%s'", + instance->name, instance->arclog_path); + + /* Create directory for data files of this specific instance */ + dir_create_dir(instance->backup_instance_path, DIR_PERMISSION); + dir_create_dir(instance->arclog_path, DIR_PERMISSION); /* - * Wite initial config. system-identifier and pgdata are set in - * init subcommand and will never be updated. + * Write initial configuration file. + * system-identifier, xlog-seg-size and pgdata are set in init subcommand + * and will never be updated. + * + * We need to manually set options source to save them to the configuration + * file. */ - pgBackupConfigInit(config); - config->system_identifier = system_identifier; - config->pgdata = pgdata; - writeBackupCatalogConfigFile(config); + config_set_opt(instance_options, &instance->system_identifier, + SOURCE_FILE); + config_set_opt(instance_options, &instance->xlog_seg_size, + SOURCE_FILE); + + /* Kludge: do not save remote options into config */ + config_set_opt(instance_options, &instance_config.remote.host, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.proto, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.port, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.path, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.user, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.ssh_options, + SOURCE_DEFAULT); + config_set_opt(instance_options, &instance_config.remote.ssh_config, + SOURCE_DEFAULT); + + /* pgdata was set through command line */ + do_set_config(true); elog(INFO, "Instance '%s' successfully inited", instance_name); return 0; diff --git a/src/merge.c b/src/merge.c index b149ef5b5..a453a073c 100644 --- a/src/merge.c +++ b/src/merge.c @@ -2,7 +2,7 @@ * * merge.c: merge FULL and incremental backups * - * Copyright (c) 2018, Postgres Professional + * Copyright (c) 2018-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -16,14 +16,20 @@ typedef struct { - parray *to_files; - parray *files; + parray *merge_filelist; + parray *parent_chain; - pgBackup *to_backup; - pgBackup *from_backup; + pgBackup *dest_backup; + pgBackup *full_backup; - const char *to_root; - const char *from_root; + const char *full_database_dir; + const char *full_external_prefix; + +// size_t in_place_merge_bytes; + bool compression_match; + bool program_version_match; + bool use_bitmap; + bool is_retry; /* * Return value from the thread. @@ -32,8 +38,25 @@ typedef struct int ret; } merge_files_arg; -static void merge_backups(pgBackup *backup, pgBackup *next_backup); + static void *merge_files(void *arg); +static void +reorder_external_dirs(pgBackup *to_backup, parray *to_external, + parray *from_external); +static int +get_external_index(const char *key, const parray *list); + +static void +merge_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, + pgFile *tmp_file, const char *to_root, bool use_bitmap, + bool is_retry); + +static void +merge_non_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, + pgFile *tmp_file, const char *full_database_dir, + const char *full_external_prefix); /* * Implementation of MERGE command. @@ -46,12 +69,11 @@ void do_merge(time_t backup_id) { parray *backups; + parray *merge_list = parray_new(); pgBackup *dest_backup = NULL; + pgBackup *dest_backup_tmp = NULL; pgBackup *full_backup = NULL; - time_t prev_parent = INVALID_BACKUP_ID; int i; - int dest_backup_idx = 0; - int full_backup_idx = 0; if (backup_id == INVALID_BACKUP_ID) elog(ERROR, "required parameter is not specified: --backup-id"); @@ -59,174 +81,600 @@ do_merge(time_t backup_id) if (instance_name == NULL) elog(ERROR, "required parameter is not specified: --instance"); - elog(LOG, "Merge started"); - - catalog_lock(); + elog(INFO, "Merge started"); /* Get list of all backups sorted in order of descending start time */ - backups = catalog_get_backup_list(INVALID_BACKUP_ID); + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); - /* Find destination and parent backups */ + /* Find destination backup first */ for (i = 0; i < parray_num(backups); i++) { pgBackup *backup = (pgBackup *) parray_get(backups, i); - if (backup->start_time > backup_id) - continue; - else if (backup->start_time == backup_id && !dest_backup) + /* found target */ + if (backup->start_time == backup_id) { - if (backup->status != BACKUP_STATUS_OK) + /* sanity */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE && + /* It is possible that previous merging was interrupted */ + backup->status != BACKUP_STATUS_MERGING && + backup->status != BACKUP_STATUS_MERGED && + backup->status != BACKUP_STATUS_DELETING) elog(ERROR, "Backup %s has status: %s", - base36enc(backup->start_time), status2str(backup->status)); - - if (backup->backup_mode == BACKUP_MODE_FULL) - elog(ERROR, "Backup %s if full backup", - base36enc(backup->start_time)); + base36enc(backup->start_time), status2str(backup->status)); dest_backup = backup; - dest_backup_idx = i; + break; } - else + } + + /* + * Handle the case of crash right after deletion of the target + * incremental backup. We still can recover from this. + * Iterate over backups and look for the FULL backup with + * MERGED status, that has merge-target-id eqial to backup_id. + */ + if (dest_backup == NULL) + { + for (i = 0; i < parray_num(backups); i++) { - Assert(dest_backup); + pgBackup *backup = (pgBackup *) parray_get(backups, i); - if (backup->start_time != prev_parent) - continue; + if (backup->status == BACKUP_STATUS_MERGED && + backup->merge_dest_backup == backup_id) + { + dest_backup = backup; + break; + } + } + } - if (backup->status != BACKUP_STATUS_OK) - elog(ERROR, "Skipping backup %s, because it has non-valid status: %s", - base36enc(backup->start_time), status2str(backup->status)); + if (dest_backup == NULL) + elog(ERROR, "Target backup %s was not found", base36enc(backup_id)); - /* If we already found dest_backup, look for full backup */ - if (dest_backup && backup->backup_mode == BACKUP_MODE_FULL) + /* It is possible to use FULL backup as target backup for merge. + * There are two possible cases: + * 1. The user want to merge FULL backup with closest incremental backup. + * In this case we must find suitable destination backup and merge them. + * + * 2. Previous merge has failed after destination backup was deleted, + * but before FULL backup was renamed: + * Example A: + * PAGE2_1 OK + * FULL2 OK + * PAGE1_1 MISSING/DELETING <- + * FULL1 MERGED/MERGING + */ + if (dest_backup->backup_mode == BACKUP_MODE_FULL) + { + full_backup = dest_backup; + dest_backup = NULL; + elog(INFO, "Merge target backup %s is full backup", + base36enc(full_backup->start_time)); + + /* sanity */ + if (full_backup->status == BACKUP_STATUS_DELETING) + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), + status2str(full_backup->status)); + + /* Case #1 */ + if (full_backup->status == BACKUP_STATUS_OK || + full_backup->status == BACKUP_STATUS_DONE) + { + /* Check the case of FULL backup having more than one direct children */ + if (is_prolific(backups, full_backup)) + elog(ERROR, "Merge target is full backup and has multiple direct children, " + "you must specify child backup id you want to merge with"); + + elog(LOG, "Looking for closest incremental backup to merge with"); + + /* Look for closest child backup */ + for (i = 0; i < parray_num(backups); i++) { - if (backup->status != BACKUP_STATUS_OK) - elog(ERROR, "Parent full backup %s for the given backup %s has status: %s", - base36enc_dup(backup->start_time), - base36enc_dup(dest_backup->start_time), - status2str(backup->status)); + pgBackup *backup = (pgBackup *) parray_get(backups, i); - full_backup = backup; - full_backup_idx = i; + /* skip unsuitable candidates */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + continue; - /* Found target and full backups, so break the loop */ - break; + if (backup->parent_backup == full_backup->start_time) + { + dest_backup = backup; + break; + } } + + /* sanity */ + if (dest_backup == NULL) + elog(ERROR, "Failed to find merge candidate, " + "backup %s has no valid children", + base36enc(full_backup->start_time)); + } + /* Case #2 */ + else if (full_backup->status == BACKUP_STATUS_MERGING) + { + /* + * MERGING - merge was ongoing at the moment of crash. + * We must find destination backup and rerun merge. + * If destination backup is missing, then merge must be aborted, + * there is no recovery from this situation. + */ + + if (full_backup->merge_dest_backup == INVALID_BACKUP_ID) + elog(ERROR, "Failed to determine merge destination backup"); - prev_parent = backup->parent_backup; + /* look up destination backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->start_time == full_backup->merge_dest_backup) + { + dest_backup = backup; + break; + } + } + if (!dest_backup) + { + char *tmp_backup_id = base36enc_dup(full_backup->start_time); + elog(ERROR, "Full backup %s has unfinished merge with missing backup %s", + tmp_backup_id, + base36enc(full_backup->merge_dest_backup)); + pg_free(tmp_backup_id); + } + } + else if (full_backup->status == BACKUP_STATUS_MERGED) + { + /* + * MERGED - merge crashed after files were transfered, but + * before rename could take place. + * If destination backup is missing, this is ok. + * If destination backup is present, then it should be deleted. + * After that FULL backup must acquire destination backup ID. + */ + + /* destination backup may or may not exists */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->start_time == full_backup->merge_dest_backup) + { + dest_backup = backup; + break; + } + } + if (!dest_backup) + { + char *tmp_backup_id = base36enc_dup(full_backup->start_time); + elog(WARNING, "Full backup %s has unfinished merge with missing backup %s", + tmp_backup_id, + base36enc(full_backup->merge_dest_backup)); + pg_free(tmp_backup_id); + } + } + else + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), + status2str(full_backup->status)); } + else + { + /* + * Legal Case #1: + * PAGE2 OK <- target + * PAGE1 OK + * FULL OK + * Legal Case #2: + * PAGE2 MERGING <- target + * PAGE1 MERGING + * FULL MERGING + * Legal Case #3: + * PAGE2 MERGING <- target + * PAGE1 DELETING + * FULL MERGED + * Legal Case #4: + * PAGE2 MERGING <- target + * PAGE1 missing + * FULL MERGED + * Legal Case #5: + * PAGE2 DELETING <- target + * FULL MERGED + * Legal Case #6: + * PAGE2 MERGING <- target + * PAGE1 missing + * FULL MERGED + * Illegal Case #7: + * PAGE2 MERGING <- target + * PAGE1 missing + * FULL MERGING + */ - if (dest_backup == NULL) - elog(ERROR, "Target backup %s was not found", base36enc(backup_id)); + if (dest_backup->status == BACKUP_STATUS_MERGING || + dest_backup->status == BACKUP_STATUS_DELETING) + elog(WARNING, "Rerun unfinished merge for backup %s", + base36enc(dest_backup->start_time)); + + /* First we should try to find parent FULL backup */ + full_backup = find_parent_full_backup(dest_backup); + + /* Chain is broken, one or more member of parent chain is missing */ + if (full_backup == NULL) + { + /* It is the legal state of affairs in Case #4, but + * only for MERGING incremental target backup and only + * if FULL backup has MERGED status. + */ + if (dest_backup->status != BACKUP_STATUS_MERGING) + elog(ERROR, "Failed to find parent full backup for %s", + base36enc(dest_backup->start_time)); + + /* Find FULL backup that has unfinished merge with dest backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + + if (backup->merge_dest_backup == dest_backup->start_time) + { + full_backup = backup; + break; + } + } + + if (!full_backup) + elog(ERROR, "Failed to find full backup that has unfinished merge" + "with backup %s, cannot rerun merge", + base36enc(dest_backup->start_time)); + + if (full_backup->status == BACKUP_STATUS_MERGED) + elog(WARNING, "Incremental chain is broken, try to recover unfinished merge"); + else + elog(ERROR, "Incremental chain is broken, merge is impossible to finish"); + } + else + { + if ((full_backup->status == BACKUP_STATUS_MERGED || + full_backup->status == BACKUP_STATUS_MERGED) && + dest_backup->start_time != full_backup->merge_dest_backup) + { + char *tmp_backup_id = base36enc_dup(full_backup->start_time); + elog(ERROR, "Full backup %s has unfinished merge with backup %s", + tmp_backup_id, base36enc(full_backup->merge_dest_backup)); + pg_free(tmp_backup_id); + } + + } + } + + /* sanity */ if (full_backup == NULL) elog(ERROR, "Parent full backup for the given backup %s was not found", base36enc(backup_id)); - Assert(full_backup_idx != dest_backup_idx); - - /* - * Found target and full backups, merge them and intermediate backups + /* At this point NULL as dest_backup is allowed only in case of full backup + * having status MERGED */ + if (dest_backup == NULL && full_backup->status != BACKUP_STATUS_MERGED) + elog(ERROR, "Cannot run merge for full backup %s", + base36enc(full_backup->start_time)); + + /* sanity */ + if (full_backup->status != BACKUP_STATUS_OK && + full_backup->status != BACKUP_STATUS_DONE && + /* It is possible that previous merging was interrupted */ + full_backup->status != BACKUP_STATUS_MERGED && + full_backup->status != BACKUP_STATUS_MERGING) + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), status2str(full_backup->status)); + + /* Form merge list */ + dest_backup_tmp = dest_backup; + + /* While loop below may looks strange, it is done so on purpose + * to handle both whole and broken incremental chains. */ - for (i = full_backup_idx; i > dest_backup_idx; i--) + while (dest_backup_tmp) { - pgBackup *to_backup = (pgBackup *) parray_get(backups, i); - pgBackup *from_backup = (pgBackup *) parray_get(backups, i - 1); - - merge_backups(to_backup, from_backup); + /* sanity */ + if (dest_backup_tmp->status != BACKUP_STATUS_OK && + dest_backup_tmp->status != BACKUP_STATUS_DONE && + /* It is possible that previous merging was interrupted */ + dest_backup_tmp->status != BACKUP_STATUS_MERGING && + dest_backup_tmp->status != BACKUP_STATUS_MERGED && + dest_backup_tmp->status != BACKUP_STATUS_DELETING) + elog(ERROR, "Backup %s has status: %s", + base36enc(dest_backup_tmp->start_time), + status2str(dest_backup_tmp->status)); + + if (dest_backup_tmp->backup_mode == BACKUP_MODE_FULL) + break; + + parray_append(merge_list, dest_backup_tmp); + dest_backup_tmp = dest_backup_tmp->parent_backup_link; } + /* Add FULL backup */ + parray_append(merge_list, full_backup); + + /* Lock merge chain */ + catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true); + + /* do actual merge */ + merge_chain(merge_list, full_backup, dest_backup); + + pgBackupValidate(full_backup, NULL); + if (full_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Merging of backup %s failed", base36enc(backup_id)); + /* cleanup */ parray_walk(backups, pgBackupFree); parray_free(backups); + parray_free(merge_list); - elog(LOG, "Merge completed"); + elog(INFO, "Merge of backup %s completed", base36enc(backup_id)); } /* - * Merge two backups data files using threads. - * - move instance files from from_backup to to_backup - * - remove unnecessary directories and files from to_backup - * - update metadata of from_backup, it becames FULL backup + * Merge backup chain. + * dest_backup - incremental backup. + * parent_chain - array of backups starting with dest_backup and + * ending with full_backup. + * + * Copy backup files from incremental backups from parent_chain into + * full backup directory. + * Remove unnecessary directories and files from full backup directory. + * Update metadata of full backup to represent destination backup. + * + * TODO: stop relying on caller to provide valid parent_chain, make sure + * that chain is ok. */ -static void -merge_backups(pgBackup *to_backup, pgBackup *from_backup) +void +merge_chain(parray *parent_chain, pgBackup *full_backup, pgBackup *dest_backup) { - char *to_backup_id = base36enc_dup(to_backup->start_time), - *from_backup_id = base36enc_dup(from_backup->start_time); - char to_backup_path[MAXPGPATH], - to_database_path[MAXPGPATH], - from_backup_path[MAXPGPATH], - from_database_path[MAXPGPATH], - control_file[MAXPGPATH]; - parray *files, - *to_files; - pthread_t *threads; - merge_files_arg *threads_args; int i; + char *dest_backup_id; + char full_external_prefix[MAXPGPATH]; + char full_database_dir[MAXPGPATH]; + parray *full_externals = NULL, + *dest_externals = NULL; + + parray *result_filelist = NULL; + bool use_bitmap = true; + bool is_retry = false; +// size_t total_in_place_merge_bytes = 0; + + pthread_t *threads = NULL; + merge_files_arg *threads_args = NULL; + time_t merge_time; bool merge_isok = true; + /* for fancy reporting */ + time_t end_time; + char pretty_time[20]; + /* in-place merge flags */ + bool compression_match = false; + bool program_version_match = false; + /* It's redundant to check block checksumms during merge */ + skip_block_validation = true; + + /* Handle corner cases of missing destination backup */ + if (dest_backup == NULL && + full_backup->status == BACKUP_STATUS_MERGED) + goto merge_rename; + + if (!dest_backup) + elog(ERROR, "Destination backup is missing, cannot continue merge"); + + if (dest_backup->status == BACKUP_STATUS_MERGING || + full_backup->status == BACKUP_STATUS_MERGING || + full_backup->status == BACKUP_STATUS_MERGED) + { + is_retry = true; + elog(INFO, "Retry failed merge of backup %s with parent chain", base36enc(dest_backup->start_time)); + } + else + elog(INFO, "Merging backup %s with parent chain", base36enc(dest_backup->start_time)); - elog(LOG, "Merging backup %s with backup %s", from_backup_id, to_backup_id); + /* sanity */ + if (full_backup->merge_dest_backup != INVALID_BACKUP_ID && + full_backup->merge_dest_backup != dest_backup->start_time) + { + char *merge_dest_backup_current = base36enc_dup(dest_backup->start_time); + char *merge_dest_backup = base36enc_dup(full_backup->merge_dest_backup); - to_backup->status = BACKUP_STATUS_MERGING; - pgBackupWriteBackupControlFile(to_backup); + elog(ERROR, "Cannot run merge for %s, because full backup %s has " + "unfinished merge with backup %s", + merge_dest_backup_current, + base36enc(full_backup->start_time), + merge_dest_backup); - from_backup->status = BACKUP_STATUS_MERGING; - pgBackupWriteBackupControlFile(from_backup); + pg_free(merge_dest_backup_current); + pg_free(merge_dest_backup); + } /* - * Make backup paths. + * Previous merging was interrupted during deleting source backup. It is + * safe just to delete it again. */ - pgBackupGetPath(to_backup, to_backup_path, lengthof(to_backup_path), NULL); - pgBackupGetPath(to_backup, to_database_path, lengthof(to_database_path), - DATABASE_DIR); - pgBackupGetPath(from_backup, from_backup_path, lengthof(from_backup_path), NULL); - pgBackupGetPath(from_backup, from_database_path, lengthof(from_database_path), - DATABASE_DIR); + if (full_backup->status == BACKUP_STATUS_MERGED) + goto merge_delete; - create_data_directories(to_database_path, from_backup_path, false); + /* Forward compatibility is not supported */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + if (parse_program_version(backup->program_version) > + parse_program_version(PROGRAM_VERSION)) + { + elog(ERROR, "Backup %s has been produced by pg_probackup version %s, " + "but current program version is %s. Forward compatibility " + "is not supported.", + base36enc(backup->start_time), + backup->program_version, + PROGRAM_VERSION); + } + } + + /* If destination backup compression algorithm differs from + * full backup compression algorithm, then in-place merge is + * not possible. + */ + if (full_backup->compress_alg == dest_backup->compress_alg) + compression_match = true; + else + elog(WARNING, "In-place merge is disabled because of compression " + "algorithms mismatch"); /* - * Get list of files which will be modified or removed. + * If current program version differs from destination backup version, + * then in-place merge is not possible. */ - pgBackupGetPath(to_backup, control_file, lengthof(control_file), - DATABASE_FILE_LIST); - to_files = dir_read_file_list(from_database_path, /* Use from_database_path - * so root path will be - * equal with 'files' */ - control_file); - /* To delete from leaf, sort in reversed order */ - parray_qsort(to_files, pgFileComparePathDesc); + if ((parse_program_version(full_backup->program_version) == + parse_program_version(dest_backup->program_version)) && + (parse_program_version(dest_backup->program_version) == + parse_program_version(PROGRAM_VERSION))) + program_version_match = true; + else + elog(WARNING, "In-place merge is disabled because of program " + "versions mismatch. Full backup version: %s, " + "destination backup version: %s, " + "current program version: %s", + full_backup->program_version, + dest_backup->program_version, + PROGRAM_VERSION); + + /* Forbid merge retry for failed merges between 2.4.0 and any + * older version. Several format changes makes it impossible + * to determine the exact format any speific file is got. + */ + if (is_retry && + parse_program_version(dest_backup->program_version) >= 20400 && + parse_program_version(full_backup->program_version) < 20400) + { + elog(ERROR, "Retry of failed merge for backups with different between minor " + "versions is forbidden to avoid data corruption because of storage format " + "changes introduced in 2.4.0 version, please take a new full backup"); + } + /* - * Get list of files which need to be moved. + * Validate or revalidate all members of parent chain + * with sole exception of FULL backup. If it has MERGING status + * then it isn't valid backup until merging is finished. */ - pgBackupGetPath(from_backup, control_file, lengthof(control_file), - DATABASE_FILE_LIST); - files = dir_read_file_list(from_database_path, control_file); - /* sort by size for load balancing */ - parray_qsort(files, pgFileCompareSize); + elog(INFO, "Validate parent chain for backup %s", + base36enc(dest_backup->start_time)); - threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); - threads_args = (merge_files_arg *) palloc(sizeof(merge_files_arg) * num_threads); + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + /* FULL backup is not to be validated if its status is MERGING */ + if (backup->backup_mode == BACKUP_MODE_FULL && + backup->status == BACKUP_STATUS_MERGING) + { + continue; + } + + pgBackupValidate(backup, NULL); + + if (backup->status != BACKUP_STATUS_OK) + elog(ERROR, "Backup %s has status %s, merge is aborted", + base36enc(backup->start_time), status2str(backup->status)); + } + + /* + * Get backup files. + */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + backup->files = get_backup_filelist(backup, true); + parray_qsort(backup->files, pgFileCompareRelPathWithExternal); + + /* Set MERGING status for every member of the chain */ + if (backup->backup_mode == BACKUP_MODE_FULL) + { + /* In case of FULL backup also remember backup_id of + * of destination backup we are merging with, so + * we can safely allow rerun merge in case of failure. + */ + backup->merge_dest_backup = dest_backup->start_time; + backup->status = BACKUP_STATUS_MERGING; + write_backup(backup, true); + } + else + write_backup_status(backup, BACKUP_STATUS_MERGING, instance_name, true); + } + + /* Construct path to database dir: /backup_dir/instance_name/FULL/database */ + join_path_components(full_database_dir, full_backup->root_dir, DATABASE_DIR); + /* Construct path to external dir: /backup_dir/instance_name/FULL/external */ + join_path_components(full_external_prefix, full_backup->root_dir, EXTERNAL_DIR); + + /* Create directories */ + create_data_directories(dest_backup->files, full_database_dir, + dest_backup->root_dir, false, false, FIO_BACKUP_HOST); + + /* External directories stuff */ + if (dest_backup->external_dir_str) + dest_externals = make_external_directory_list(dest_backup->external_dir_str, false); + if (full_backup->external_dir_str) + full_externals = make_external_directory_list(full_backup->external_dir_str, false); + /* + * Rename external directories in FULL backup (if exists) + * according to numeration of external dirs in destionation backup. + */ + if (full_externals && dest_externals) + reorder_external_dirs(full_backup, full_externals, dest_externals); + + /* bitmap optimization rely on n_blocks, which is generally available since 2.3.0 */ + if (parse_program_version(dest_backup->program_version) < 20300) + use_bitmap = false; /* Setup threads */ - for (i = 0; i < parray_num(files); i++) + for (i = 0; i < parray_num(dest_backup->files); i++) { - pgFile *file = (pgFile *) parray_get(files, i); + pgFile *file = (pgFile *) parray_get(dest_backup->files, i); + + /* if the entry was an external directory, create it in the backup */ + if (file->external_dir_num && S_ISDIR(file->mode)) + { + char dirpath[MAXPGPATH]; + char new_container[MAXPGPATH]; + + makeExternalDirPathByNum(new_container, full_external_prefix, + file->external_dir_num); + join_path_components(dirpath, new_container, file->rel_path); + dir_create_dir(dirpath, DIR_PERMISSION); + } pg_atomic_init_flag(&file->lock); } + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (merge_files_arg *) palloc(sizeof(merge_files_arg) * num_threads); + + thread_interrupted = false; + merge_time = time(NULL); + elog(INFO, "Start merging backup files"); for (i = 0; i < num_threads; i++) { merge_files_arg *arg = &(threads_args[i]); - - arg->to_files = to_files; - arg->files = files; - arg->to_backup = to_backup; - arg->from_backup = from_backup; - arg->to_root = to_database_path; - arg->from_root = from_database_path; + arg->merge_filelist = parray_new(); + arg->parent_chain = parent_chain; + arg->dest_backup = dest_backup; + arg->full_backup = full_backup; + arg->full_database_dir = full_database_dir; + arg->full_external_prefix = full_external_prefix; + + arg->compression_match = compression_match; + arg->program_version_match = program_version_match; + arg->use_bitmap = use_bitmap; + arg->is_retry = is_retry; /* By default there are some error */ arg->ret = 1; @@ -236,290 +684,717 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) } /* Wait threads */ + result_filelist = parray_new(); for (i = 0; i < num_threads; i++) { pthread_join(threads[i], NULL); if (threads_args[i].ret == 1) merge_isok = false; + + /* Compile final filelist */ + parray_concat(result_filelist, threads_args[i].merge_filelist); + + /* cleanup */ + parray_free(threads_args[i].merge_filelist); + //total_in_place_merge_bytes += threads_args[i].in_place_merge_bytes; } - if (!merge_isok) - elog(ERROR, "Data files merging failed"); - /* - * Files were copied into to_backup and deleted from from_backup. Remove - * remaining directories from from_backup. - */ - parray_qsort(files, pgFileComparePathDesc); - for (i = 0; i < parray_num(files); i++) + time(&end_time); + pretty_time_interval(difftime(end_time, merge_time), + pretty_time, lengthof(pretty_time)); + + if (merge_isok) + elog(INFO, "Backup files are successfully merged, time elapsed: %s", + pretty_time); + else + elog(ERROR, "Backup files merging failed, time elapsed: %s", + pretty_time); + + /* If temp header map is open, then close it and make rename */ + if (full_backup->hdr_map.fp) { - pgFile *file = (pgFile *) parray_get(files, i); + cleanup_header_map(&(full_backup->hdr_map)); - if (!S_ISDIR(file->mode)) - continue; + /* sync new header map to disk */ + if (fio_sync(full_backup->hdr_map.path_tmp, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync temp header map \"%s\": %s", + full_backup->hdr_map.path_tmp, strerror(errno)); - if (rmdir(file->path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - file->path, strerror(errno)); + /* Replace old header map with new one */ + if (rename(full_backup->hdr_map.path_tmp, full_backup->hdr_map.path)) + elog(ERROR, "Could not rename file \"%s\" to \"%s\": %s", + full_backup->hdr_map.path_tmp, full_backup->hdr_map.path, strerror(errno)); + } + + /* Close page header maps */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + cleanup_header_map(&(backup->hdr_map)); } - if (rmdir(from_database_path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - from_database_path, strerror(errno)); - if (unlink(control_file)) - elog(ERROR, "Could not remove file \"%s\": %s", - control_file, strerror(errno)); - - pgBackupGetPath(from_backup, control_file, lengthof(control_file), - BACKUP_CONTROL_FILE); - if (unlink(control_file)) - elog(ERROR, "Could not remove file \"%s\": %s", - control_file, strerror(errno)); - - if (rmdir(from_backup_path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - from_backup_path, strerror(errno)); /* - * Delete files which are not in from_backup file list. + * Update FULL backup metadata. + * We cannot set backup status to OK just yet, + * because it still has old start_time. + */ + StrNCpy(full_backup->program_version, PROGRAM_VERSION, + sizeof(full_backup->program_version)); + full_backup->parent_backup = INVALID_BACKUP_ID; + full_backup->start_lsn = dest_backup->start_lsn; + full_backup->stop_lsn = dest_backup->stop_lsn; + full_backup->recovery_time = dest_backup->recovery_time; + full_backup->recovery_xid = dest_backup->recovery_xid; + full_backup->tli = dest_backup->tli; + full_backup->from_replica = dest_backup->from_replica; + + pfree(full_backup->external_dir_str); + full_backup->external_dir_str = pgut_strdup(dest_backup->external_dir_str); + pfree(full_backup->primary_conninfo); + full_backup->primary_conninfo = pgut_strdup(dest_backup->primary_conninfo); + + full_backup->merge_time = merge_time; + full_backup->end_time = time(NULL); + + full_backup->compress_alg = dest_backup->compress_alg; + full_backup->compress_level = dest_backup->compress_level; + + /* If incremental backup is pinned, + * then result FULL backup must also be pinned. + * And reverse, if FULL backup was pinned and dest was not, + * then pinning is no more. + */ + full_backup->expire_time = dest_backup->expire_time; + + pg_free(full_backup->note); + full_backup->note = NULL; + + if (dest_backup->note) + full_backup->note = pgut_strdup(dest_backup->note); + + /* FULL backup must inherit wal mode. */ + full_backup->stream = dest_backup->stream; + + /* ARCHIVE backup must inherit wal_bytes too. + * STREAM backup will have its wal_bytes calculated by + * write_backup_filelist(). */ - for (i = 0; i < parray_num(to_files); i++) + if (!dest_backup->stream) + full_backup->wal_bytes = dest_backup->wal_bytes; + + parray_qsort(result_filelist, pgFileCompareRelPathWithExternal); + + write_backup_filelist(full_backup, result_filelist, full_database_dir, NULL, true); + write_backup(full_backup, true); + + /* Delete FULL backup files, that do not exists in destination backup + * Both arrays must be sorted in in reversed order to delete from leaf + */ + parray_qsort(dest_backup->files, pgFileCompareRelPathWithExternalDesc); + parray_qsort(full_backup->files, pgFileCompareRelPathWithExternalDesc); + for (i = 0; i < parray_num(full_backup->files); i++) { - pgFile *file = (pgFile *) parray_get(to_files, i); + pgFile *full_file = (pgFile *) parray_get(full_backup->files, i); - if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) + if (full_file->external_dir_num && full_externals) { - pgFileDelete(file); - elog(LOG, "Deleted \"%s\"", file->path); + char *dir_name = parray_get(full_externals, full_file->external_dir_num - 1); + if (backup_contains_external(dir_name, full_externals)) + /* Dir already removed*/ + continue; } + + if (parray_bsearch(dest_backup->files, full_file, pgFileCompareRelPathWithExternalDesc) == NULL) + { + char full_file_path[MAXPGPATH]; + + /* We need full path, file object has relative path */ + join_path_components(full_file_path, full_database_dir, full_file->rel_path); + + pgFileDelete(full_file->mode, full_file_path); + elog(VERBOSE, "Deleted \"%s\"", full_file_path); + } + } + + /* Critical section starts. + * Change status of FULL backup. + * Files are merged into FULL backup. It is time to remove incremental chain. + */ + full_backup->status = BACKUP_STATUS_MERGED; + write_backup(full_backup, true); + +merge_delete: + for (i = parray_num(parent_chain) - 2; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + delete_backup_files(backup); } /* - * Rename FULL backup directory. + * PAGE2 DELETED + * PAGE1 DELETED + * FULL MERGED + * If we crash now, automatic rerun of failed merge is still possible: + * The user should start merge with full backup ID as an argument to option '-i'. */ - if (rename(to_backup_path, from_backup_path) == -1) - elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", - to_backup_path, from_backup_path, strerror(errno)); +merge_rename: /* - * Update to_backup metadata. + * Rename FULL backup directory to destination backup directory. */ - pgBackupCopy(to_backup, from_backup); - /* Correct metadata */ - to_backup->backup_mode = BACKUP_MODE_FULL; - to_backup->status = BACKUP_STATUS_OK; - to_backup->parent_backup = INVALID_BACKUP_ID; - /* Compute summary of size of regular files in the backup */ - to_backup->data_bytes = 0; - for (i = 0; i < parray_num(files); i++) + if (dest_backup) { - pgFile *file = (pgFile *) parray_get(files, i); - - if (S_ISDIR(file->mode)) - to_backup->data_bytes += 4096; - /* Count the amount of the data actually copied */ - else if (S_ISREG(file->mode)) - to_backup->data_bytes += file->write_size; + elog(LOG, "Rename %s to %s", full_backup->root_dir, dest_backup->root_dir); + if (rename(full_backup->root_dir, dest_backup->root_dir) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + full_backup->root_dir, dest_backup->root_dir, strerror(errno)); + + /* update root_dir after rename */ + pg_free(full_backup->root_dir); + full_backup->root_dir = pgut_strdup(dest_backup->root_dir); } - /* compute size of wal files of this backup stored in the archive */ - if (!current.stream) - to_backup->wal_bytes = XLOG_SEG_SIZE * - (to_backup->stop_lsn / XLogSegSize - to_backup->start_lsn / XLogSegSize + 1); else - to_backup->wal_bytes = BYTES_INVALID; + { + /* Ugly */ + char backups_dir[MAXPGPATH]; + char instance_dir[MAXPGPATH]; + char destination_path[MAXPGPATH]; + + join_path_components(backups_dir, backup_path, BACKUPS_DIR); + join_path_components(instance_dir, backups_dir, instance_name); + join_path_components(destination_path, instance_dir, + base36enc(full_backup->merge_dest_backup)); + + elog(LOG, "Rename %s to %s", full_backup->root_dir, destination_path); + if (rename(full_backup->root_dir, destination_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + full_backup->root_dir, destination_path, strerror(errno)); + + /* update root_dir after rename */ + pg_free(full_backup->root_dir); + full_backup->root_dir = pgut_strdup(destination_path); + } + + /* Reinit path to database_dir */ + join_path_components(full_backup->database_dir, full_backup->root_dir, DATABASE_DIR); + + /* If we crash here, it will produce full backup in MERGED + * status, located in directory with wrong backup id. + * It should not be a problem. + */ + + /* + * Merging finished, now we can safely update ID of the FULL backup + */ + dest_backup_id = base36enc_dup(full_backup->merge_dest_backup); + elog(INFO, "Rename merged full backup %s to %s", + base36enc(full_backup->start_time), dest_backup_id); - pgBackupWriteFileList(to_backup, files, from_database_path); - pgBackupWriteBackupControlFile(to_backup); + full_backup->status = BACKUP_STATUS_OK; + full_backup->start_time = full_backup->merge_dest_backup; + full_backup->merge_dest_backup = INVALID_BACKUP_ID; + write_backup(full_backup, true); + /* Critical section end */ /* Cleanup */ - pfree(threads_args); - pfree(threads); + pg_free(dest_backup_id); + if (threads) + { + pfree(threads_args); + pfree(threads); + } - parray_walk(to_files, pgFileFree); - parray_free(to_files); + if (result_filelist && parray_num(result_filelist) > 0) + { + parray_walk(result_filelist, pgFileFree); + parray_free(result_filelist); + } - parray_walk(files, pgFileFree); - parray_free(files); + if (dest_externals != NULL) + free_dir_list(dest_externals); + + if (full_externals != NULL) + free_dir_list(full_externals); - pfree(to_backup_id); - pfree(from_backup_id); + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + if (backup->files) + { + parray_walk(backup->files, pgFileFree); + parray_free(backup->files); + } + } } /* - * Thread worker of merge_backups(). + * Thread worker of merge_chain(). */ static void * merge_files(void *arg) { - merge_files_arg *argument = (merge_files_arg *) arg; - pgBackup *to_backup = argument->to_backup; - pgBackup *from_backup = argument->from_backup; - char tmp_file_path[MAXPGPATH]; - int i, - num_files = parray_num(argument->files); - int to_root_len = strlen(argument->to_root); - - if (to_backup->compress_alg == PGLZ_COMPRESS || - to_backup->compress_alg == ZLIB_COMPRESS) - join_path_components(tmp_file_path, argument->to_root, "tmp"); - - for (i = 0; i < num_files; i++) + int i; + merge_files_arg *arguments = (merge_files_arg *) arg; + size_t n_files = parray_num(arguments->dest_backup->files); + + for (i = 0; i < n_files; i++) { - pgFile *file = (pgFile *) parray_get(argument->files, i); + pgFile *dest_file = (pgFile *) parray_get(arguments->dest_backup->files, i); + pgFile *tmp_file; + bool in_place = false; /* keep file as it is */ - if (!pg_atomic_test_set_flag(&file->lock)) + /* check for interrupt */ + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during merge"); + + if (!pg_atomic_test_set_flag(&dest_file->lock)) continue; - /* check for interrupt */ - if (interrupted) - elog(ERROR, "Interrupted during merging backups"); + tmp_file = pgFileInit(dest_file->rel_path); + tmp_file->mode = dest_file->mode; + tmp_file->is_datafile = dest_file->is_datafile; + tmp_file->is_cfs = dest_file->is_cfs; + tmp_file->external_dir_num = dest_file->external_dir_num; + tmp_file->dbOid = dest_file->dbOid; + + /* Directories were created before */ + if (S_ISDIR(dest_file->mode)) + goto done; if (progress) - elog(LOG, "Progress: (%d/%d). Process file \"%s\"", - i + 1, num_files, file->path); + elog(INFO, "Progress: (%d/%lu). Merging file \"%s\"", + i + 1, n_files, dest_file->rel_path); + + if (dest_file->is_datafile && !dest_file->is_cfs) + tmp_file->segno = dest_file->segno; + + // If destination file is 0 sized, then go for the next + if (dest_file->write_size == 0) + { + if (!dest_file->is_datafile || dest_file->is_cfs) + tmp_file->crc = dest_file->crc; + + tmp_file->write_size = 0; + goto done; + } /* - * Skip files which haven't changed since previous backup. But in case - * of DELTA backup we should consider n_blocks to truncate the target - * backup. + * If file didn`t changed over the course of all incremental chain, + * then do in-place merge, unless destination backup has + * different compression algorithm. + * In-place merge is also impossible, if program version of destination + * backup differs from PROGRAM_VERSION */ - if (file->write_size == BYTES_INVALID && - file->n_blocks == -1) + if (arguments->program_version_match && arguments->compression_match && + !arguments->is_retry) { - elog(VERBOSE, "Skip merging file \"%s\", the file didn't change", - file->path); - /* - * If the file wasn't changed in PAGE backup, retreive its - * write_size from previous FULL backup. + * Case 1: + * in this case in place merge is possible: + * 0 PAGE; file, size BYTES_INVALID + * 1 PAGE; file, size BYTES_INVALID + * 2 FULL; file, size 100500 + * + * Case 2: + * in this case in place merge is possible: + * 0 PAGE; file, size 0 + * 1 PAGE; file, size 0 + * 2 FULL; file, size 100500 + * + * Case 3: + * in this case in place merge is impossible: + * 0 PAGE; file, size BYTES_INVALID + * 1 PAGE; file, size 100501 + * 2 FULL; file, size 100500 + * + * Case 4 (good candidate for future optimization): + * in this case in place merge is impossible: + * 0 PAGE; file, size BYTES_INVALID + * 1 PAGE; file, size 100501 + * 2 FULL; file, not exists yet */ - if (S_ISREG(file->mode)) + + in_place = true; + + for (i = parray_num(arguments->parent_chain) - 1; i >= 0; i--) { - pgFile **res_file; + pgFile **res_file = NULL; + pgFile *file = NULL; + + pgBackup *backup = (pgBackup *) parray_get(arguments->parent_chain, i); + + /* lookup file in intermediate backup */ + res_file = parray_bsearch(backup->files, dest_file, pgFileCompareRelPathWithExternal); + file = (res_file) ? *res_file : NULL; + + /* Destination file is not exists yet, + * in-place merge is impossible + */ + if (file == NULL) + { + in_place = false; + break; + } - res_file = parray_bsearch(argument->to_files, file, - pgFileComparePathDesc); - if (res_file && *res_file) + /* Skip file from FULL backup */ + if (backup->backup_mode == BACKUP_MODE_FULL) + continue; + + if (file->write_size != BYTES_INVALID) { - file->compress_alg = (*res_file)->compress_alg; - file->write_size = (*res_file)->write_size; - file->crc = (*res_file)->crc; + in_place = false; + break; } } - - continue; } - /* Directories were created before */ - if (S_ISDIR(file->mode)) - continue; - /* - * Move the file. We need to decompress it and compress again if - * necessary. + * In-place merge means that file in FULL backup stays as it is, + * no additional actions are required. + * page header map cannot be trusted when retrying, so no + * in place merge for retry. */ - elog(VERBOSE, "Moving file \"%s\", is_datafile %d, is_cfs %d", - file->path, file->is_database, file->is_cfs); - - if (file->is_datafile && !file->is_cfs) + if (in_place) { - char to_path_tmp[MAXPGPATH]; /* Path of target file */ + pgFile **res_file = NULL; + pgFile *file = NULL; + res_file = parray_bsearch(arguments->full_backup->files, dest_file, + pgFileCompareRelPathWithExternal); + file = (res_file) ? *res_file : NULL; + + /* If file didn`t changed in any way, then in-place merge is possible */ + if (file && + file->n_blocks == dest_file->n_blocks) + { + BackupPageHeader2 *headers = NULL; - join_path_components(to_path_tmp, argument->to_root, - file->path + to_root_len + 1); + elog(VERBOSE, "The file didn`t changed since FULL backup, skip merge: \"%s\"", + file->rel_path); - /* - * We need more complicate algorithm if target file exists and it is - * compressed. - */ - if (to_backup->compress_alg == PGLZ_COMPRESS || - to_backup->compress_alg == ZLIB_COMPRESS) - { - char *prev_path; + tmp_file->crc = file->crc; + tmp_file->write_size = file->write_size; - /* Start the magic */ + if (dest_file->is_datafile && !dest_file->is_cfs) + { + tmp_file->n_blocks = file->n_blocks; + tmp_file->compress_alg = file->compress_alg; + tmp_file->uncompressed_size = file->n_blocks * BLCKSZ; - /* - * Merge files: - * - decompress first file - * - decompress second file and merge with first decompressed file - * - compress result file - */ + tmp_file->n_headers = file->n_headers; + tmp_file->hdr_crc = file->hdr_crc; + } + else + tmp_file->uncompressed_size = tmp_file->write_size; - elog(VERBOSE, "File is compressed, decompress to the temporary file \"%s\"", - tmp_file_path); + /* Copy header metadata from old map into a new one */ + tmp_file->n_headers = file->n_headers; + headers = get_data_file_headers(&(arguments->full_backup->hdr_map), file, + parse_program_version(arguments->full_backup->program_version), + true); - prev_path = file->path; - /* - * We need to decompress target file only if it exists. - */ - if (fileExists(to_path_tmp)) - { - /* - * file->path points to the file in from_root directory. But we - * need the file in directory to_root. - */ - file->path = to_path_tmp; + /* sanity */ + if (!headers && file->n_headers > 0) + elog(ERROR, "Failed to get headers for file \"%s\"", file->rel_path); - /* Decompress first/target file */ - restore_data_file(tmp_file_path, file, false, false); + write_page_headers(headers, tmp_file, &(arguments->full_backup->hdr_map), true); + pg_free(headers); - file->path = prev_path; - } - /* Merge second/source file with first/target file */ - restore_data_file(tmp_file_path, file, - from_backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - false); - - elog(VERBOSE, "Compress file and save it to the directory \"%s\"", - argument->to_root); - - /* Again we need change path */ - file->path = tmp_file_path; - /* backup_data_file() requires file size to calculate nblocks */ - file->size = pgFileSize(file->path); - /* Now we can compress the file */ - backup_data_file(NULL, /* We shouldn't need 'arguments' here */ - to_path_tmp, file, - to_backup->start_lsn, - to_backup->backup_mode, - to_backup->compress_alg, - to_backup->compress_level); - - file->path = prev_path; - - /* We can remove temporary file now */ - if (unlink(tmp_file_path)) - elog(ERROR, "Could not remove temporary file \"%s\": %s", - tmp_file_path, strerror(errno)); + //TODO: report in_place merge bytes. + goto done; } - /* - * Otherwise merging algorithm is simpler. - */ - else - { - /* We can merge in-place here */ - restore_data_file(to_path_tmp, file, - from_backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - true); - - /* - * We need to calculate write_size, restore_data_file() doesn't - * do that. - */ - file->write_size = pgFileSize(to_path_tmp); - file->crc = pgFileGetCRC(to_path_tmp); - } - pgFileDelete(file); } - else - move_file(argument->from_root, argument->to_root, file); - if (file->write_size != BYTES_INVALID) - elog(LOG, "Moved file \"%s\": " INT64_FORMAT " bytes", - file->path, file->write_size); + if (dest_file->is_datafile && !dest_file->is_cfs) + merge_data_file(arguments->parent_chain, + arguments->full_backup, + arguments->dest_backup, + dest_file, tmp_file, + arguments->full_database_dir, + arguments->use_bitmap, + arguments->is_retry); + else + merge_non_data_file(arguments->parent_chain, + arguments->full_backup, + arguments->dest_backup, + dest_file, tmp_file, + arguments->full_database_dir, + arguments->full_external_prefix); + +done: + parray_append(arguments->merge_filelist, tmp_file); } /* Data files merging is successful */ - argument->ret = 0; + arguments->ret = 0; return NULL; } + +/* Recursively delete a directory and its contents */ +static void +remove_dir_with_files(const char *path) +{ + parray *files = parray_new(); + int i; + char full_path[MAXPGPATH]; + + dir_list_file(files, path, false, false, true, false, false, 0, FIO_LOCAL_HOST); + parray_qsort(files, pgFileCompareRelPathWithExternalDesc); + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + + join_path_components(full_path, path, file->rel_path); + + pgFileDelete(file->mode, full_path); + elog(VERBOSE, "Deleted \"%s\"", full_path); + } + + /* cleanup */ + parray_walk(files, pgFileFree); + parray_free(files); +} + +/* Get index of external directory */ +static int +get_external_index(const char *key, const parray *list) +{ + int i; + + if (!list) /* Nowhere to search */ + return -1; + for (i = 0; i < parray_num(list); i++) + { + if (strcmp(key, parray_get(list, i)) == 0) + return i + 1; + } + return -1; +} + +/* Rename directories in to_backup according to order in from_external */ +static void +reorder_external_dirs(pgBackup *to_backup, parray *to_external, + parray *from_external) +{ + char externaldir_template[MAXPGPATH]; + int i; + + join_path_components(externaldir_template, to_backup->root_dir, EXTERNAL_DIR); + for (i = 0; i < parray_num(to_external); i++) + { + int from_num = get_external_index(parray_get(to_external, i), + from_external); + if (from_num == -1) + { + char old_path[MAXPGPATH]; + makeExternalDirPathByNum(old_path, externaldir_template, i + 1); + remove_dir_with_files(old_path); + } + else if (from_num != i + 1) + { + char old_path[MAXPGPATH]; + char new_path[MAXPGPATH]; + makeExternalDirPathByNum(old_path, externaldir_template, i + 1); + makeExternalDirPathByNum(new_path, externaldir_template, from_num); + elog(VERBOSE, "Rename %s to %s", old_path, new_path); + if (rename (old_path, new_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + old_path, new_path, strerror(errno)); + } + } +} + +/* Merge is usually happens as usual backup/restore via temp files, unless + * file didn`t changed since FULL backup AND full a dest backup have the + * same compression algorithm. In this case file can be left as it is. + */ +void +merge_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, pgFile *tmp_file, + const char *full_database_dir, bool use_bitmap, bool is_retry) +{ + FILE *out = NULL; + char *buffer = pgut_malloc(STDIO_BUFSIZE); + char to_fullpath[MAXPGPATH]; + char to_fullpath_tmp1[MAXPGPATH]; /* used for restore */ + char to_fullpath_tmp2[MAXPGPATH]; /* used for backup */ + + /* The next possible optimization is copying "as is" the file + * from intermediate incremental backup, that didn`t changed in + * subsequent incremental backups. TODO. + */ + + /* set fullpath of destination file and temp files */ + join_path_components(to_fullpath, full_database_dir, tmp_file->rel_path); + snprintf(to_fullpath_tmp1, MAXPGPATH, "%s_tmp1", to_fullpath); + snprintf(to_fullpath_tmp2, MAXPGPATH, "%s_tmp2", to_fullpath); + + /* open temp file */ + out = fopen(to_fullpath_tmp1, PG_BINARY_W); + if (out == NULL) + elog(ERROR, "Cannot open merge target file \"%s\": %s", + to_fullpath_tmp1, strerror(errno)); + setvbuf(out, buffer, _IOFBF, STDIO_BUFSIZE); + + /* restore file into temp file */ + tmp_file->size = restore_data_file(parent_chain, dest_file, out, to_fullpath_tmp1, + use_bitmap, NULL, InvalidXLogRecPtr, NULL, + /* when retrying merge header map cannot be trusted */ + is_retry ? false : true); + if (fclose(out) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", + to_fullpath_tmp1, strerror(errno)); + + pg_free(buffer); + + /* tmp_file->size is greedy, even if there is single 8KB block in file, + * that was overwritten twice during restore_data_file, we would assume that its size is + * 16KB. + * TODO: maybe we should just trust dest_file->n_blocks? + * No, we can`t, because current binary can be used to merge + * 2 backups of old versions, where n_blocks is missing. + */ + + backup_data_file(NULL, tmp_file, to_fullpath_tmp1, to_fullpath_tmp2, + InvalidXLogRecPtr, BACKUP_MODE_FULL, + dest_backup->compress_alg, dest_backup->compress_level, + dest_backup->checksum_version, 0, NULL, + &(full_backup->hdr_map), true); + + /* drop restored temp file */ + if (unlink(to_fullpath_tmp1) == -1) + elog(ERROR, "Cannot remove file \"%s\": %s", to_fullpath_tmp1, + strerror(errno)); + + /* + * In old (=<2.2.7) versions of pg_probackup n_blocks attribute of files + * in PAGE and PTRACK wasn`t filled. + */ + //Assert(tmp_file->n_blocks == dest_file->n_blocks); + + /* Backward compatibility kludge: + * When merging old backups, it is possible that + * to_fullpath_tmp2 size will be 0, and so it will be + * truncated in backup_data_file(). + * TODO: remove in 3.0.0 + */ + if (tmp_file->write_size == 0) + return; + + /* sync second temp file to disk */ + if (fio_sync(to_fullpath_tmp2, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync merge temp file \"%s\": %s", + to_fullpath_tmp2, strerror(errno)); + + /* Do atomic rename from second temp file to destination file */ + if (rename(to_fullpath_tmp2, to_fullpath) == -1) + elog(ERROR, "Could not rename file \"%s\" to \"%s\": %s", + to_fullpath_tmp2, to_fullpath, strerror(errno)); + + /* drop temp file */ + unlink(to_fullpath_tmp1); +} + +/* + * For every destionation file lookup the newest file in chain and + * copy it. + * Additional pain is external directories. + */ +void +merge_non_data_file(parray *parent_chain, pgBackup *full_backup, + pgBackup *dest_backup, pgFile *dest_file, pgFile *tmp_file, + const char *full_database_dir, const char *to_external_prefix) +{ + int i; + char to_fullpath[MAXPGPATH]; + char to_fullpath_tmp[MAXPGPATH]; /* used for backup */ + char from_fullpath[MAXPGPATH]; + pgBackup *from_backup = NULL; + pgFile *from_file = NULL; + + /* We need to make full path to destination file */ + if (dest_file->external_dir_num) + { + char temp[MAXPGPATH]; + makeExternalDirPathByNum(temp, to_external_prefix, + dest_file->external_dir_num); + join_path_components(to_fullpath, temp, dest_file->rel_path); + } + else + join_path_components(to_fullpath, full_database_dir, dest_file->rel_path); + + snprintf(to_fullpath_tmp, MAXPGPATH, "%s_tmp", to_fullpath); + + /* + * Iterate over parent chain starting from direct parent of destination + * backup to oldest backup in chain, and look for the first + * full copy of destination file. + * Full copy is latest possible destination file with size equal(!) + * or greater than zero. + */ + for (i = 0; i < parray_num(parent_chain); i++) + { + pgFile **res_file = NULL; + from_backup = (pgBackup *) parray_get(parent_chain, i); + + /* lookup file in intermediate backup */ + res_file = parray_bsearch(from_backup->files, dest_file, pgFileCompareRelPathWithExternal); + from_file = (res_file) ? *res_file : NULL; + + /* + * It should not be possible not to find source file in intermediate + * backup, without encountering full copy first. + */ + if (!from_file) + { + elog(ERROR, "Failed to locate nonedata file \"%s\" in backup %s", + dest_file->rel_path, base36enc(from_backup->start_time)); + continue; + } + + if (from_file->write_size > 0) + break; + } + + /* sanity */ + if (!from_backup) + elog(ERROR, "Failed to found a backup containing full copy of nonedata file \"%s\"", + dest_file->rel_path); + + if (!from_file) + elog(ERROR, "Failed to locate a full copy of nonedata file \"%s\"", dest_file->rel_path); + + /* set path to source file */ + if (from_file->external_dir_num) + { + char temp[MAXPGPATH]; + char external_prefix[MAXPGPATH]; + + join_path_components(external_prefix, from_backup->root_dir, EXTERNAL_DIR); + makeExternalDirPathByNum(temp, external_prefix, dest_file->external_dir_num); + + join_path_components(from_fullpath, temp, from_file->rel_path); + } + else + { + char backup_database_dir[MAXPGPATH]; + join_path_components(backup_database_dir, from_backup->root_dir, DATABASE_DIR); + join_path_components(from_fullpath, backup_database_dir, from_file->rel_path); + } + + /* Copy file to FULL backup directory into temp file */ + backup_non_data_file(tmp_file, NULL, from_fullpath, + to_fullpath_tmp, BACKUP_MODE_FULL, 0, false); + + /* sync temp file to disk */ + if (fio_sync(to_fullpath_tmp, FIO_BACKUP_HOST) != 0) + elog(ERROR, "Cannot sync merge temp file \"%s\": %s", + to_fullpath_tmp, strerror(errno)); + + /* Do atomic rename from second temp file to destination file */ + if (rename(to_fullpath_tmp, to_fullpath) == -1) + elog(ERROR, "Could not rename file \"%s\" to \"%s\": %s", + to_fullpath_tmp, to_fullpath, strerror(errno)); + +} diff --git a/src/parsexlog.c b/src/parsexlog.c index 3e5e87903..5a33d3045 100644 --- a/src/parsexlog.c +++ b/src/parsexlog.c @@ -5,23 +5,25 @@ * * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2015-2018, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" -#include -#include +#include "access/transam.h" +#include "catalog/pg_control.h" +#include "commands/dbcommands_xlog.h" +#include "catalog/storage_xlog.h" + #ifdef HAVE_LIBZ #include #endif -#include "commands/dbcommands_xlog.h" -#include "catalog/storage_xlog.h" -#include "access/transam.h" #include "utils/thread.h" +#include +#include /* * RmgrNames is an array of resource manager names, to make error messages @@ -81,38 +83,64 @@ typedef struct xl_xact_abort /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ } xl_xact_abort; -static void extractPageInfo(XLogReaderState *record); -static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime); +/* + * XLogRecTarget allows to track the last recovery targets. Currently used only + * within validate_wal(). + */ +typedef struct XLogRecTarget +{ + TimestampTz rec_time; + TransactionId rec_xid; + XLogRecPtr rec_lsn; +} XLogRecTarget; -typedef struct XLogPageReadPrivate +typedef struct XLogReaderData { - const char *archivedir; + int thread_num; TimeLineID tli; - bool manual_switch; + XLogRecTarget cur_rec; + XLogSegNo xlogsegno; + bool xlogexists; + + char page_buf[XLOG_BLCKSZ]; + uint32 prev_page_off; + bool need_switch; int xlogfile; - XLogSegNo xlogsegno; char xlogpath[MAXPGPATH]; - bool xlogexists; #ifdef HAVE_LIBZ - gzFile gz_xlogfile; - char gz_xlogpath[MAXPGPATH]; + gzFile gz_xlogfile; + char gz_xlogpath[MAXPGPATH]; #endif -} XLogPageReadPrivate; +} XLogReaderData; + +/* Function to process a WAL record */ +typedef void (*xlog_record_function) (XLogReaderState *record, + XLogReaderData *reader_data, + bool *stop_reading); /* An argument for a thread function */ typedef struct { - int thread_num; - XLogPageReadPrivate private_data; + XLogReaderData reader_data; + + xlog_record_function process_record; XLogRecPtr startpoint; XLogRecPtr endpoint; XLogSegNo endSegNo; + /* + * The thread got the recovery target. + */ + bool got_target; + + /* Should we read record, located at endpoint position */ + bool inclusive_endpoint; + /* * Return value from the thread. * 0 means there is no error, 1 - there is an error. @@ -124,278 +152,234 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI); -static XLogReaderState *InitXLogPageRead(XLogPageReadPrivate *private_data, +static XLogReaderState *InitXLogPageRead(XLogReaderData *reader_data, const char *archivedir, - TimeLineID tli, bool allocate_reader); + TimeLineID tli, uint32 segment_size, + bool manual_switch, + bool consistent_read, + bool allocate_reader); +static bool RunXLogThreads(const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, + TimeLineID tli, uint32 segment_size, + XLogRecPtr startpoint, XLogRecPtr endpoint, + bool consistent_read, + xlog_record_function process_record, + XLogRecTarget *last_rec, + bool inclusive_endpoint); +//static XLogReaderState *InitXLogThreadRead(xlog_thread_arg *arg); +static bool SwitchThreadToNextWal(XLogReaderState *xlogreader, + xlog_thread_arg *arg); +static bool XLogWaitForConsistency(XLogReaderState *xlogreader); +static void *XLogThreadWorker(void *arg); static void CleanupXLogPageRead(XLogReaderState *xlogreader); -static void PrintXLogCorruptionMsg(XLogPageReadPrivate *private_data, - int elevel); +static void PrintXLogCorruptionMsg(XLogReaderData *reader_data, int elevel); + +static void extractPageInfo(XLogReaderState *record, + XLogReaderData *reader_data, bool *stop_reading); +static void validateXLogRecord(XLogReaderState *record, + XLogReaderData *reader_data, bool *stop_reading); +static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime); -static XLogSegNo nextSegNoToRead = 0; +static XLogSegNo segno_start = 0; +/* Segment number where target record is located */ +static XLogSegNo segno_target = 0; +/* Next segment number to read by a thread */ +static XLogSegNo segno_next = 0; +/* Number of segments already read by threads */ +static uint32 segnum_read = 0; +/* Number of detected corrupted or absent segments */ +static uint32 segnum_corrupted = 0; static pthread_mutex_t wal_segment_mutex = PTHREAD_MUTEX_INITIALIZER; -/* - * extractPageMap() worker. - */ -static void * -doExtractPageMap(void *arg) +/* copied from timestamp.c */ +static pg_time_t +timestamptz_to_time_t(TimestampTz t) { - xlog_thread_arg *extract_arg = (xlog_thread_arg *) arg; - XLogPageReadPrivate *private_data; - XLogReaderState *xlogreader; - XLogSegNo nextSegNo = 0; - char *errormsg; - - private_data = &extract_arg->private_data; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, private_data); - if (xlogreader == NULL) - elog(ERROR, "out of memory"); - - extract_arg->startpoint = XLogFindNextRecord(xlogreader, - extract_arg->startpoint); - - elog(VERBOSE, "Start LSN of thread %d: %X/%X", - extract_arg->thread_num, - (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint)); - - /* Switch WAL segment manually below without using SimpleXLogPageRead() */ - private_data->manual_switch = true; - - do - { - XLogRecord *record; - - if (interrupted) - elog(ERROR, "Interrupted during WAL reading"); - - record = XLogReadRecord(xlogreader, extract_arg->startpoint, &errormsg); - - if (record == NULL) - { - XLogRecPtr errptr; - - /* - * Try to switch to the next WAL segment. Usually - * SimpleXLogPageRead() does it by itself. But here we need to do it - * manually to support threads. - */ - if (private_data->need_switch) - { - private_data->need_switch = false; - - /* Critical section */ - pthread_lock(&wal_segment_mutex); - Assert(nextSegNoToRead); - private_data->xlogsegno = nextSegNoToRead; - nextSegNoToRead++; - pthread_mutex_unlock(&wal_segment_mutex); - - /* We reach the end */ - if (private_data->xlogsegno > extract_arg->endSegNo) - break; - - /* Adjust next record position */ - XLogSegNoOffsetToRecPtr(private_data->xlogsegno, 0, - extract_arg->startpoint); - /* Skip over the page header */ - extract_arg->startpoint = XLogFindNextRecord(xlogreader, - extract_arg->startpoint); - - elog(VERBOSE, "Thread %d switched to LSN %X/%X", - extract_arg->thread_num, - (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint)); + pg_time_t result; - continue; - } - - errptr = extract_arg->startpoint ? - extract_arg->startpoint : xlogreader->EndRecPtr; - - if (errormsg) - elog(WARNING, "could not read WAL record at %X/%X: %s", - (uint32) (errptr >> 32), (uint32) (errptr), - errormsg); - else - elog(WARNING, "could not read WAL record at %X/%X", - (uint32) (errptr >> 32), (uint32) (errptr)); - - /* - * If we don't have all WAL files from prev backup start_lsn to current - * start_lsn, we won't be able to build page map and PAGE backup will - * be incorrect. Stop it and throw an error. - */ - PrintXLogCorruptionMsg(private_data, ERROR); - } - - extractPageInfo(xlogreader); - - /* continue reading at next record */ - extract_arg->startpoint = InvalidXLogRecPtr; - - XLByteToSeg(xlogreader->EndRecPtr, nextSegNo); - } while (nextSegNo <= extract_arg->endSegNo && - xlogreader->EndRecPtr < extract_arg->endpoint); +#ifdef HAVE_INT64_TIMESTAMP + result = (pg_time_t) (t / USECS_PER_SEC + + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); +#else + result = (pg_time_t) (t + + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); +#endif + return result; +} - CleanupXLogPageRead(xlogreader); - XLogReaderFree(xlogreader); +static const char *wal_archivedir = NULL; +static uint32 wal_seg_size = 0; +/* + * If true a wal reader thread switches to the next segment using + * segno_next. + */ +static bool wal_manual_switch = false; +/* + * If true a wal reader thread waits for other threads if the thread met absent + * wal segment. + */ +static bool wal_consistent_read = false; - /* Extracting is successful */ - extract_arg->ret = 0; - return NULL; -} +/* + * Variables used within validate_wal() and validateXLogRecord() to stop workers + */ +static time_t wal_target_time = 0; +static TransactionId wal_target_xid = InvalidTransactionId; +static XLogRecPtr wal_target_lsn = InvalidXLogRecPtr; /* * Read WAL from the archive directory, from 'startpoint' to 'endpoint' on the * given timeline. Collect data blocks touched by the WAL records into a page map. * - * If **prev_segno** is true then read all segments up to **endpoint** segment - * minus one. Else read all segments up to **endpoint** segment. - * - * Pagemap extracting is processed using threads. Eeach thread reads single WAL + * Pagemap extracting is processed using threads. Each thread reads single WAL * file. */ -void -extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli, - XLogRecPtr endpoint, bool prev_seg, parray *files) +bool +extractPageMap(const char *archivedir, uint32 wal_seg_size, + XLogRecPtr startpoint, TimeLineID start_tli, + XLogRecPtr endpoint, TimeLineID end_tli, + parray *tli_list) { - int i; - int threads_need = 0; - XLogSegNo endSegNo; - bool extract_isok = true; - pthread_t *threads; - xlog_thread_arg *thread_args; - time_t start_time, - end_time; + bool extract_isok = false; + + if (start_tli == end_tli) + /* easy case */ + extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, end_tli, wal_seg_size, + startpoint, endpoint, false, extractPageInfo, + NULL, true); + else + { + /* We have to process WAL located on several different xlog intervals, + * located on different timelines. + * + * Consider this example: + * t3 C-----X + * / + * t1 -A----*-------> + * + * A - prev backup START_LSN + * B - switchpoint for t2, available as t2->switchpoint + * C - switch for t3, available as t3->switchpoint + * X - current backup START_LSN + * + * Intervals to be parsed: + * - [A,B) on t1 + * - [B,C) on t2 + * - [C,X] on t3 + */ + int i; + parray *interval_list = parray_new(); + timelineInfo *end_tlinfo = NULL; + timelineInfo *tmp_tlinfo = NULL; + XLogRecPtr prev_switchpoint = InvalidXLogRecPtr; + + /* We must find TLI information about final timeline (t3 in example) */ + for (i = 0; i < parray_num(tli_list); i++) + { + tmp_tlinfo = parray_get(tli_list, i); - elog(LOG, "Compiling pagemap"); - if (!XRecOffIsValid(startpoint)) - elog(ERROR, "Invalid startpoint value %X/%X", - (uint32) (startpoint >> 32), (uint32) (startpoint)); + if (tmp_tlinfo->tli == end_tli) + { + end_tlinfo = tmp_tlinfo; + break; + } + } - if (!XRecOffIsValid(endpoint)) - elog(ERROR, "Invalid endpoint value %X/%X", - (uint32) (endpoint >> 32), (uint32) (endpoint)); + /* Iterate over timelines backward, + * starting with end_tli and ending with start_tli. + * For every timeline calculate LSN-interval that must be parsed. + */ - XLByteToSeg(endpoint, endSegNo); - if (prev_seg) - endSegNo--; + tmp_tlinfo = end_tlinfo; + while (tmp_tlinfo) + { + lsnInterval *wal_interval = pgut_malloc(sizeof(lsnInterval)); + wal_interval->tli = tmp_tlinfo->tli; - nextSegNoToRead = 0; - time(&start_time); + if (tmp_tlinfo->tli == end_tli) + { + wal_interval->begin_lsn = tmp_tlinfo->switchpoint; + wal_interval->end_lsn = endpoint; + } + else if (tmp_tlinfo->tli == start_tli) + { + wal_interval->begin_lsn = startpoint; + wal_interval->end_lsn = prev_switchpoint; + } + else + { + wal_interval->begin_lsn = tmp_tlinfo->switchpoint; + wal_interval->end_lsn = prev_switchpoint; + } - threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); - thread_args = (xlog_thread_arg *) palloc(sizeof(xlog_thread_arg)*num_threads); + parray_append(interval_list, wal_interval); - /* - * Initialize thread args. - * - * Each thread works with its own WAL segment and we need to adjust - * startpoint value for each thread. - */ - for (i = 0; i < num_threads; i++) - { - InitXLogPageRead(&thread_args[i].private_data, archivedir, tli, false); - thread_args[i].thread_num = i; + if (tmp_tlinfo->tli == start_tli) + break; - thread_args[i].startpoint = startpoint; - thread_args[i].endpoint = endpoint; - thread_args[i].endSegNo = endSegNo; - /* By default there is some error */ - thread_args[i].ret = 1; + prev_switchpoint = tmp_tlinfo->switchpoint; + tmp_tlinfo = tmp_tlinfo->parent_link; + } - /* Adjust startpoint to the next thread */ - if (nextSegNoToRead == 0) - XLByteToSeg(startpoint, nextSegNoToRead); + for (i = parray_num(interval_list) - 1; i >= 0; i--) + { + bool inclusive_endpoint; + lsnInterval *tmp_interval = (lsnInterval *) parray_get(interval_list, i); - nextSegNoToRead++; - /* - * If we need to read less WAL segments than num_threads, create less - * threads. - */ - if (nextSegNoToRead > endSegNo) - break; - XLogSegNoOffsetToRecPtr(nextSegNoToRead, 0, startpoint); - /* Skip over the page header */ - startpoint += SizeOfXLogLongPHD; + /* In case of replica promotion, endpoints of intermediate + * timelines can be unreachable. + */ + inclusive_endpoint = false; - threads_need++; - } + /* ... but not the end timeline */ + if (tmp_interval->tli == end_tli) + inclusive_endpoint = true; - /* Run threads */ - for (i = 0; i < threads_need; i++) - { - elog(VERBOSE, "Start WAL reader thread: %d", i); - pthread_create(&threads[i], NULL, doExtractPageMap, &thread_args[i]); - } + extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tmp_interval->tli, wal_seg_size, + tmp_interval->begin_lsn, tmp_interval->end_lsn, + false, extractPageInfo, NULL, inclusive_endpoint); + if (!extract_isok) + break; - /* Wait for threads */ - for (i = 0; i < threads_need; i++) - { - pthread_join(threads[i], NULL); - if (thread_args[i].ret == 1) - extract_isok = false; + pg_free(tmp_interval); + } + pg_free(interval_list); } - pfree(threads); - pfree(thread_args); - - time(&end_time); - if (extract_isok) - elog(LOG, "Pagemap compiled, time elapsed %.0f sec", - difftime(end_time, start_time)); - else - elog(ERROR, "Pagemap compiling failed"); + return extract_isok; } /* - * Ensure that the backup has all wal files needed for recovery to consistent state. + * Ensure that the backup has all wal files needed for recovery to consistent + * state. + * + * WAL records reading is processed using threads. Each thread reads single WAL + * file. */ static void validate_backup_wal_from_start_to_stop(pgBackup *backup, - char *backup_xlog_path, TimeLineID tli) + const char *archivedir, TimeLineID tli, + uint32 xlog_seg_size) { - XLogRecPtr startpoint = backup->start_lsn; - XLogRecord *record; - XLogReaderState *xlogreader; - char *errormsg; - XLogPageReadPrivate private; - bool got_endpoint = false; - - xlogreader = InitXLogPageRead(&private, backup_xlog_path, tli, true); - - while (true) - { - record = XLogReadRecord(xlogreader, startpoint, &errormsg); - - if (record == NULL) - { - if (errormsg) - elog(WARNING, "%s", errormsg); - - break; - } + bool got_endpoint; - /* Got WAL record at stop_lsn */ - if (xlogreader->ReadRecPtr == backup->stop_lsn) - { - got_endpoint = true; - break; - } - startpoint = InvalidXLogRecPtr; /* continue reading at next record */ - } + got_endpoint = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tli, xlog_seg_size, + backup->start_lsn, backup->stop_lsn, + false, NULL, NULL, true); if (!got_endpoint) { - PrintXLogCorruptionMsg(&private, WARNING); - /* * If we don't have WAL between start_lsn and stop_lsn, * the backup is definitely corrupted. Update its status. */ - backup->status = BACKUP_STATUS_CORRUPT; - pgBackupWriteBackupControlFile(backup); + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); elog(WARNING, "There are not enough WAL records to consistenly restore " "backup %s from START LSN: %X/%X to STOP LSN: %X/%X", @@ -405,10 +389,6 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup, (uint32) (backup->stop_lsn >> 32), (uint32) (backup->stop_lsn)); } - - /* clean */ - CleanupXLogPageRead(xlogreader); - XLogReaderFree(xlogreader); } /* @@ -417,25 +397,15 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup, * up to the given recovery target. */ void -validate_wal(pgBackup *backup, - const char *archivedir, - time_t target_time, - TransactionId target_xid, - XLogRecPtr target_lsn, - TimeLineID tli) +validate_wal(pgBackup *backup, const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, TimeLineID tli, uint32 wal_seg_size) { - XLogRecPtr startpoint = backup->start_lsn; const char *backup_id; - XLogRecord *record; - XLogReaderState *xlogreader; - char *errormsg; - XLogPageReadPrivate private; - TransactionId last_xid = InvalidTransactionId; - TimestampTz last_time = 0; + XLogRecTarget last_rec; char last_timestamp[100], target_timestamp[100]; bool all_wal = false; - char backup_xlog_path[MAXPGPATH]; /* We need free() this later */ backup_id = base36enc(backup->start_time); @@ -456,13 +426,18 @@ validate_wal(pgBackup *backup, */ if (backup->stream) { - snprintf(backup_xlog_path, sizeof(backup_xlog_path), "/%s/%s/%s/%s", - backup_instance_path, backup_id, DATABASE_DIR, PG_XLOG_DIR); + char backup_database_dir[MAXPGPATH]; + char backup_xlog_path[MAXPGPATH]; + + join_path_components(backup_database_dir, backup->root_dir, DATABASE_DIR); + join_path_components(backup_xlog_path, backup_database_dir, PG_XLOG_DIR); - validate_backup_wal_from_start_to_stop(backup, backup_xlog_path, tli); + validate_backup_wal_from_start_to_stop(backup, backup_xlog_path, tli, + wal_seg_size); } else - validate_backup_wal_from_start_to_stop(backup, (char *) archivedir, tli); + validate_backup_wal_from_start_to_stop(backup, (char *) archivedir, tli, + wal_seg_size); if (backup->status == BACKUP_STATUS_CORRUPT) { @@ -473,7 +448,8 @@ validate_wal(pgBackup *backup, * If recovery target is provided check that we can restore backup to a * recovery target time or xid. */ - if (!TransactionIdIsValid(target_xid) && target_time == 0 && !XRecOffIsValid(target_lsn)) + if (!TransactionIdIsValid(target_xid) && target_time == 0 && + !XRecOffIsValid(target_lsn)) { /* Recovery target is not given so exit */ elog(INFO, "Backup %s WAL segments are valid", backup_id); @@ -484,7 +460,7 @@ validate_wal(pgBackup *backup, * If recovery target is provided, ensure that archive files exist in * archive directory. */ - if (dir_is_empty(archivedir)) + if (dir_is_empty(archivedir, FIO_LOCAL_HOST)) elog(ERROR, "WAL archive is empty. You cannot restore backup to a recovery target without WAL archive."); /* @@ -492,94 +468,54 @@ validate_wal(pgBackup *backup, * up to the given recovery target. * In any case we cannot restore to the point before stop_lsn. */ - xlogreader = InitXLogPageRead(&private, archivedir, tli, true); /* We can restore at least up to the backup end */ + last_rec.rec_time = 0; + last_rec.rec_xid = backup->recovery_xid; + last_rec.rec_lsn = backup->stop_lsn; + time2iso(last_timestamp, lengthof(last_timestamp), backup->recovery_time); - last_xid = backup->recovery_xid; - if ((TransactionIdIsValid(target_xid) && target_xid == last_xid) + if ((TransactionIdIsValid(target_xid) && target_xid == last_rec.rec_xid) || (target_time != 0 && backup->recovery_time >= target_time) - || (XRecOffIsValid(target_lsn) && backup->stop_lsn >= target_lsn)) + || (XRecOffIsValid(target_lsn) && last_rec.rec_lsn >= target_lsn)) all_wal = true; - startpoint = backup->stop_lsn; - while (true) - { - bool timestamp_record; - - record = XLogReadRecord(xlogreader, startpoint, &errormsg); - if (record == NULL) - { - if (errormsg) - elog(WARNING, "%s", errormsg); - - break; - } - - timestamp_record = getRecordTimestamp(xlogreader, &last_time); - if (XLogRecGetXid(xlogreader) != InvalidTransactionId) - last_xid = XLogRecGetXid(xlogreader); - - /* Check target xid */ - if (TransactionIdIsValid(target_xid) && target_xid == last_xid) - { - all_wal = true; - break; - } - /* Check target time */ - else if (target_time != 0 && timestamp_record && timestamptz_to_time_t(last_time) >= target_time) - { - all_wal = true; - break; - } - /* If there are no target xid and target time */ - else if (!TransactionIdIsValid(target_xid) && target_time == 0 && - xlogreader->ReadRecPtr == backup->stop_lsn) - { - all_wal = true; - /* We don't stop here. We want to get last_xid and last_time */ - } - - startpoint = InvalidXLogRecPtr; /* continue reading at next record */ - } - - if (last_time > 0) + all_wal = all_wal || + RunXLogThreads(archivedir, target_time, target_xid, target_lsn, + tli, wal_seg_size, backup->stop_lsn, + InvalidXLogRecPtr, true, validateXLogRecord, &last_rec, true); + if (last_rec.rec_time > 0) time2iso(last_timestamp, lengthof(last_timestamp), - timestamptz_to_time_t(last_time)); + timestamptz_to_time_t(last_rec.rec_time)); /* There are all needed WAL records */ if (all_wal) - elog(INFO, "backup validation completed successfully on time %s and xid " XID_FMT, - last_timestamp, last_xid); + elog(INFO, "Backup validation completed successfully on time %s, xid " XID_FMT " and LSN %X/%X", + last_timestamp, last_rec.rec_xid, + (uint32) (last_rec.rec_lsn >> 32), (uint32) last_rec.rec_lsn); /* Some needed WAL records are absent */ else { - PrintXLogCorruptionMsg(&private, WARNING); - - elog(WARNING, "recovery can be done up to time %s and xid " XID_FMT, - last_timestamp, last_xid); + elog(WARNING, "Recovery can be done up to time %s, xid " XID_FMT " and LSN %X/%X", + last_timestamp, last_rec.rec_xid, + (uint32) (last_rec.rec_lsn >> 32), (uint32) last_rec.rec_lsn); if (target_time > 0) - time2iso(target_timestamp, lengthof(target_timestamp), - target_time); + time2iso(target_timestamp, lengthof(target_timestamp), target_time); if (TransactionIdIsValid(target_xid) && target_time != 0) - elog(ERROR, "not enough WAL records to time %s and xid " XID_FMT, + elog(ERROR, "Not enough WAL records to time %s and xid " XID_FMT, target_timestamp, target_xid); else if (TransactionIdIsValid(target_xid)) - elog(ERROR, "not enough WAL records to xid " XID_FMT, + elog(ERROR, "Not enough WAL records to xid " XID_FMT, target_xid); else if (target_time != 0) - elog(ERROR, "not enough WAL records to time %s", + elog(ERROR, "Not enough WAL records to time %s", target_timestamp); else if (XRecOffIsValid(target_lsn)) - elog(ERROR, "not enough WAL records to lsn %X/%X", + elog(ERROR, "Not enough WAL records to lsn %X/%X", (uint32) (target_lsn >> 32), (uint32) (target_lsn)); } - - /* clean */ - CleanupXLogPageRead(xlogreader); - XLogReaderFree(xlogreader); } /* @@ -588,13 +524,13 @@ validate_wal(pgBackup *backup, * pg_stop_backup(). */ bool -read_recovery_info(const char *archivedir, TimeLineID tli, +read_recovery_info(const char *archivedir, TimeLineID tli, uint32 wal_seg_size, XLogRecPtr start_lsn, XLogRecPtr stop_lsn, - time_t *recovery_time, TransactionId *recovery_xid) + time_t *recovery_time) { XLogRecPtr startpoint = stop_lsn; XLogReaderState *xlogreader; - XLogPageReadPrivate private; + XLogReaderData reader_data; bool res; if (!XRecOffIsValid(start_lsn)) @@ -605,7 +541,8 @@ read_recovery_info(const char *archivedir, TimeLineID tli, elog(ERROR, "Invalid stop_lsn value %X/%X", (uint32) (stop_lsn >> 32), (uint32) (stop_lsn)); - xlogreader = InitXLogPageRead(&private, archivedir, tli, true); + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, true, true); /* Read records from stop_lsn down to start_lsn */ do @@ -622,11 +559,11 @@ read_recovery_info(const char *archivedir, TimeLineID tli, errptr = startpoint ? startpoint : xlogreader->EndRecPtr; if (errormsg) - elog(ERROR, "could not read WAL record at %X/%X: %s", + elog(ERROR, "Could not read WAL record at %X/%X: %s", (uint32) (errptr >> 32), (uint32) (errptr), errormsg); else - elog(ERROR, "could not read WAL record at %X/%X", + elog(ERROR, "Could not read WAL record at %X/%X", (uint32) (errptr >> 32), (uint32) (errptr)); } @@ -636,7 +573,6 @@ read_recovery_info(const char *archivedir, TimeLineID tli, if (getRecordTimestamp(xlogreader, &last_time)) { *recovery_time = timestamptz_to_time_t(last_time); - *recovery_xid = XLogRecGetXid(xlogreader); /* Found timestamp in WAL record 'record' */ res = true; @@ -660,10 +596,10 @@ read_recovery_info(const char *archivedir, TimeLineID tli, */ bool wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, - TimeLineID target_tli) + TimeLineID target_tli, uint32 wal_seg_size) { XLogReaderState *xlogreader; - XLogPageReadPrivate private; + XLogReaderData reader_data; char *errormsg; bool res; @@ -671,165 +607,484 @@ wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, elog(ERROR, "Invalid target_lsn value %X/%X", (uint32) (target_lsn >> 32), (uint32) (target_lsn)); - xlogreader = InitXLogPageRead(&private, archivedir, target_tli, true); + xlogreader = InitXLogPageRead(&reader_data, archivedir, target_tli, + wal_seg_size, false, false, true); + + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + + xlogreader->system_identifier = instance_config.system_identifier; res = XLogReadRecord(xlogreader, target_lsn, &errormsg) != NULL; /* Didn't find 'target_lsn' and there is no error, return false */ + if (errormsg) + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (target_lsn >> 32), (uint32) (target_lsn), errormsg); + CleanupXLogPageRead(xlogreader); XLogReaderFree(xlogreader); return res; } -#ifdef HAVE_LIBZ /* - * Show error during work with compressed file + * Get LSN of a first record within the WAL segment with number 'segno'. */ -static const char * -get_gz_error(gzFile gzf) +XLogRecPtr +get_first_record_lsn(const char *archivedir, XLogSegNo segno, + TimeLineID tli, uint32 wal_seg_size, int timeout) { - int errnum; - const char *errmsg; + XLogReaderState *xlogreader; + XLogReaderData reader_data; + XLogRecPtr record = InvalidXLogRecPtr; + XLogRecPtr startpoint; + char wal_segment[MAXFNAMELEN]; + int attempts = 0; - errmsg = gzerror(gzf, &errnum); - if (errnum == Z_ERRNO) - return strerror(errno); - else - return errmsg; -} -#endif + if (segno <= 1) + elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); -/* XLogreader callback function, to read a WAL page */ -static int -SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, - int reqLen, XLogRecPtr targetRecPtr, char *readBuf, - TimeLineID *pageTLI) -{ - XLogPageReadPrivate *private_data; - uint32 targetPageOff; + GetXLogFileName(wal_segment, tli, segno, instance_config.xlog_seg_size); - private_data = (XLogPageReadPrivate *) xlogreader->private_data; - targetPageOff = targetPagePtr % XLogSegSize; + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + xlogreader->system_identifier = instance_config.system_identifier; - /* - * See if we need to switch to a new segment because the requested record - * is not in the currently open one. - */ - if (!XLByteInSeg(targetPagePtr, private_data->xlogsegno)) + /* Set startpoint to 0 in segno */ + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + + while (attempts <= timeout) { - CleanupXLogPageRead(xlogreader); - /* - * Do not switch to next WAL segment in this function. Currently it is - * manually switched only in doExtractPageMap(). - */ - if (private_data->manual_switch) + record = XLogFindNextRecord(xlogreader, startpoint); + + if (XLogRecPtrIsInvalid(record)) + record = InvalidXLogRecPtr; + else { - private_data->need_switch = true; - return -1; + elog(LOG, "First record in WAL segment \"%s\": %X/%X", wal_segment, + (uint32) (record >> 32), (uint32) (record)); + break; } + + attempts++; + sleep(1); } - XLByteToSeg(targetPagePtr, private_data->xlogsegno); + /* cleanup */ + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); - /* Try to switch to the next WAL segment */ - if (!private_data->xlogexists) - { - char xlogfname[MAXFNAMELEN]; + return record; +} - XLogFileName(xlogfname, private_data->tli, private_data->xlogsegno); - snprintf(private_data->xlogpath, MAXPGPATH, "%s/%s", - private_data->archivedir, xlogfname); - if (fileExists(private_data->xlogpath)) - { - elog(LOG, "Opening WAL segment \"%s\"", private_data->xlogpath); +/* + * Get LSN of the record next after target lsn. + */ +XLogRecPtr +get_next_record_lsn(const char *archivedir, XLogSegNo segno, + TimeLineID tli, uint32 wal_seg_size, int timeout, + XLogRecPtr target) +{ + XLogReaderState *xlogreader; + XLogReaderData reader_data; + XLogRecPtr startpoint, found; + XLogRecPtr res = InvalidXLogRecPtr; + char wal_segment[MAXFNAMELEN]; + int attempts = 0; + + if (segno <= 1) + elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); + + GetXLogFileName(wal_segment, tli, segno, instance_config.xlog_seg_size); + + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + xlogreader->system_identifier = instance_config.system_identifier; + + /* Set startpoint to 0 in segno */ + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + + found = XLogFindNextRecord(xlogreader, startpoint); + + if (XLogRecPtrIsInvalid(found)) + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (startpoint >> 32), (uint32) (startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + startpoint = found; + + while (attempts <= timeout) + { + XLogRecord *record; + char *errormsg; + + if (interrupted) + elog(ERROR, "Interrupted during WAL reading"); + + record = XLogReadRecord(xlogreader, startpoint, &errormsg); + + if (record == NULL) + { + XLogRecPtr errptr; + + errptr = XLogRecPtrIsInvalid(startpoint) ? xlogreader->EndRecPtr : + startpoint; + + if (errormsg) + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (errptr >> 32), (uint32) (errptr)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + + if (xlogreader->ReadRecPtr >= target) + { + elog(LOG, "Record %X/%X is next after target LSN %X/%X", + (uint32) (xlogreader->ReadRecPtr >> 32), (uint32) (xlogreader->ReadRecPtr), + (uint32) (target >> 32), (uint32) (target)); + res = xlogreader->ReadRecPtr; + break; + } + else + startpoint = InvalidXLogRecPtr; + } + + /* cleanup */ + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return res; +} + + +/* + * Get LSN of a record prior to target_lsn. + * If 'start_lsn' is in the segment with number 'segno' then start from 'start_lsn', + * otherwise start from offset 0 within the segment. + * + * Returns LSN of a record which EndRecPtr is greater or equal to target_lsn. + * If 'seek_prev_segment' is true, then look for prior record in prior WAL segment. + * + * it's unclear that "last" in "last_wal_lsn" refers to the + * "closest to stop_lsn backward or forward, depending on seek_prev_segment setting". + */ +XLogRecPtr +get_prior_record_lsn(const char *archivedir, XLogRecPtr start_lsn, + XLogRecPtr stop_lsn, TimeLineID tli, bool seek_prev_segment, + uint32 wal_seg_size) +{ + XLogReaderState *xlogreader; + XLogReaderData reader_data; + XLogRecPtr startpoint; + XLogSegNo start_segno; + XLogSegNo segno; + XLogRecPtr res = InvalidXLogRecPtr; + + GetXLogSegNo(stop_lsn, segno, wal_seg_size); + + if (segno <= 1) + elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); + + if (seek_prev_segment) + segno = segno - 1; + + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); + + if (xlogreader == NULL) + elog(ERROR, "Out of memory"); + + xlogreader->system_identifier = instance_config.system_identifier; + + /* + * Calculate startpoint. Decide: we should use 'start_lsn' or offset 0. + */ + GetXLogSegNo(start_lsn, start_segno, wal_seg_size); + if (start_segno == segno) + startpoint = start_lsn; + else + { + XLogRecPtr found; + + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + found = XLogFindNextRecord(xlogreader, startpoint); + + if (XLogRecPtrIsInvalid(found)) + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (startpoint >> 32), (uint32) (startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + startpoint = found; + } + + while (true) + { + XLogRecord *record; + char *errormsg; + + if (interrupted) + elog(ERROR, "Interrupted during WAL reading"); + + record = XLogReadRecord(xlogreader, startpoint, &errormsg); + if (record == NULL) + { + XLogRecPtr errptr; + + errptr = XLogRecPtrIsInvalid(startpoint) ? xlogreader->EndRecPtr : + startpoint; + + if (errormsg) + elog(WARNING, "Could not read WAL record at %X/%X: %s", + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Could not read WAL record at %X/%X", + (uint32) (errptr >> 32), (uint32) (errptr)); + PrintXLogCorruptionMsg(&reader_data, ERROR); + } + + if (xlogreader->EndRecPtr >= stop_lsn) + { + elog(LOG, "Record %X/%X has endpoint %X/%X which is equal or greater than requested LSN %X/%X", + (uint32) (xlogreader->ReadRecPtr >> 32), (uint32) (xlogreader->ReadRecPtr), + (uint32) (xlogreader->EndRecPtr >> 32), (uint32) (xlogreader->EndRecPtr), + (uint32) (stop_lsn >> 32), (uint32) (stop_lsn)); + res = xlogreader->ReadRecPtr; + break; + } + + /* continue reading at next record */ + startpoint = InvalidXLogRecPtr; + } + + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + return res; +} + +#ifdef HAVE_LIBZ +/* + * Show error during work with compressed file + */ +static const char * +get_gz_error(gzFile gzf) +{ + int errnum; + const char *errmsg; + + errmsg = fio_gzerror(gzf, &errnum); + if (errnum == Z_ERRNO) + return strerror(errno); + else + return errmsg; +} +#endif + +/* XLogreader callback function, to read a WAL page */ +static int +SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *pageTLI) +{ + XLogReaderData *reader_data; + uint32 targetPageOff; + + reader_data = (XLogReaderData *) xlogreader->private_data; + targetPageOff = targetPagePtr % wal_seg_size; + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + /* + * See if we need to switch to a new segment because the requested record + * is not in the currently open one. + */ + if (!IsInXLogSeg(targetPagePtr, reader_data->xlogsegno, wal_seg_size)) + { + elog(VERBOSE, "Thread [%d]: Need to switch to the next WAL segment, page LSN %X/%X, record being read LSN %X/%X", + reader_data->thread_num, + (uint32) (targetPagePtr >> 32), (uint32) (targetPagePtr), + (uint32) (xlogreader->currRecPtr >> 32), + (uint32) (xlogreader->currRecPtr )); + + /* + * If the last record on the page is not complete, + * we must continue reading pages in the same thread + */ + if (!XLogRecPtrIsInvalid(xlogreader->currRecPtr) && + xlogreader->currRecPtr < targetPagePtr) + { + CleanupXLogPageRead(xlogreader); + + /* + * Switch to the next WAL segment after reading contrecord. + */ + if (wal_manual_switch) + reader_data->need_switch = true; + } + else + { + CleanupXLogPageRead(xlogreader); + /* + * Do not switch to next WAL segment in this function. It is + * manually switched by a thread routine. + */ + if (wal_manual_switch) + { + reader_data->need_switch = true; + return -1; + } + } + } + + GetXLogSegNo(targetPagePtr, reader_data->xlogsegno, wal_seg_size); + + /* Try to switch to the next WAL segment */ + if (!reader_data->xlogexists) + { + char xlogfname[MAXFNAMELEN]; + char partial_file[MAXPGPATH]; + + GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, wal_seg_size); + + snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir, xlogfname); + snprintf(reader_data->gz_xlogpath, MAXPGPATH, "%s.gz", reader_data->xlogpath); - private_data->xlogexists = true; - private_data->xlogfile = open(private_data->xlogpath, - O_RDONLY | PG_BINARY, 0); + /* We fall back to using .partial segment in case if we are running + * multi-timeline incremental backup right after standby promotion. + * TODO: it should be explicitly enabled. + */ + snprintf(partial_file, MAXPGPATH, "%s.partial", reader_data->xlogpath); + + /* If segment do not exists, but the same + * segment with '.partial' suffix does, use it instead */ + if (!fileExists(reader_data->xlogpath, FIO_LOCAL_HOST) && + fileExists(partial_file, FIO_LOCAL_HOST)) + { + snprintf(reader_data->xlogpath, MAXPGPATH, "%s", partial_file); + } + + if (fileExists(reader_data->xlogpath, FIO_LOCAL_HOST)) + { + elog(LOG, "Thread [%d]: Opening WAL segment \"%s\"", + reader_data->thread_num, reader_data->xlogpath); + + reader_data->xlogexists = true; + reader_data->xlogfile = fio_open(reader_data->xlogpath, + O_RDONLY | PG_BINARY, FIO_LOCAL_HOST); - if (private_data->xlogfile < 0) + if (reader_data->xlogfile < 0) { - elog(WARNING, "Could not open WAL segment \"%s\": %s", - private_data->xlogpath, strerror(errno)); + elog(WARNING, "Thread [%d]: Could not open WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->xlogpath, + strerror(errno)); return -1; } } #ifdef HAVE_LIBZ /* Try to open compressed WAL segment */ - else + else if (fileExists(reader_data->gz_xlogpath, FIO_LOCAL_HOST)) { - snprintf(private_data->gz_xlogpath, - sizeof(private_data->gz_xlogpath), "%s.gz", - private_data->xlogpath); - if (fileExists(private_data->gz_xlogpath)) - { - elog(LOG, "Opening compressed WAL segment \"%s\"", - private_data->gz_xlogpath); + elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"", + reader_data->thread_num, reader_data->gz_xlogpath); - private_data->xlogexists = true; - private_data->gz_xlogfile = gzopen(private_data->gz_xlogpath, - "rb"); - if (private_data->gz_xlogfile == NULL) - { - elog(WARNING, "Could not open compressed WAL segment \"%s\": %s", - private_data->gz_xlogpath, strerror(errno)); - return -1; - } + reader_data->xlogexists = true; + reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath, + "rb", -1, FIO_LOCAL_HOST); + if (reader_data->gz_xlogfile == NULL) + { + elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->gz_xlogpath, + strerror(errno)); + return -1; } } #endif - /* Exit without error if WAL segment doesn't exist */ - if (!private_data->xlogexists) + if (!reader_data->xlogexists) return -1; } /* * At this point, we have the right segment open. */ - Assert(private_data->xlogexists); + Assert(reader_data->xlogexists); + + /* + * Do not read same page read earlier from the file, read it from the buffer + */ + if (reader_data->prev_page_off != 0 && + reader_data->prev_page_off == targetPageOff) + { + memcpy(readBuf, reader_data->page_buf, XLOG_BLCKSZ); + *pageTLI = reader_data->tli; + return XLOG_BLCKSZ; + } /* Read the requested page */ - if (private_data->xlogfile != -1) + if (reader_data->xlogfile != -1) { - if (lseek(private_data->xlogfile, (off_t) targetPageOff, SEEK_SET) < 0) + if (fio_seek(reader_data->xlogfile, (off_t) targetPageOff) < 0) { - elog(WARNING, "Could not seek in WAL segment \"%s\": %s", - private_data->xlogpath, strerror(errno)); + elog(WARNING, "Thread [%d]: Could not seek in WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); return -1; } - if (read(private_data->xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (fio_read(reader_data->xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { - elog(WARNING, "Could not read from WAL segment \"%s\": %s", - private_data->xlogpath, strerror(errno)); + elog(WARNING, "Thread [%d]: Could not read from WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); return -1; } } #ifdef HAVE_LIBZ else { - if (gzseek(private_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1) + if (fio_gzseek(reader_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1) { - elog(WARNING, "Could not seek in compressed WAL segment \"%s\": %s", - private_data->gz_xlogpath, - get_gz_error(private_data->gz_xlogfile)); + elog(WARNING, "Thread [%d]: Could not seek in compressed WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->gz_xlogpath, + get_gz_error(reader_data->gz_xlogfile)); return -1; } - if (gzread(private_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (fio_gzread(reader_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { - elog(WARNING, "Could not read from compressed WAL segment \"%s\": %s", - private_data->gz_xlogpath, - get_gz_error(private_data->gz_xlogfile)); + elog(WARNING, "Thread [%d]: Could not read from compressed WAL segment \"%s\": %s", + reader_data->thread_num, reader_data->gz_xlogpath, + get_gz_error(reader_data->gz_xlogfile)); return -1; } } #endif - *pageTLI = private_data->tli; + memcpy(reader_data->page_buf, readBuf, XLOG_BLCKSZ); + reader_data->prev_page_off = targetPageOff; + *pageTLI = reader_data->tli; return XLOG_BLCKSZ; } @@ -837,79 +1092,622 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, * Initialize WAL segments reading. */ static XLogReaderState * -InitXLogPageRead(XLogPageReadPrivate *private_data, const char *archivedir, - TimeLineID tli, bool allocate_reader) +InitXLogPageRead(XLogReaderData *reader_data, const char *archivedir, + TimeLineID tli, uint32 segment_size, bool manual_switch, + bool consistent_read, bool allocate_reader) { XLogReaderState *xlogreader = NULL; - MemSet(private_data, 0, sizeof(XLogPageReadPrivate)); - private_data->archivedir = archivedir; - private_data->tli = tli; - private_data->xlogfile = -1; + wal_archivedir = archivedir; + wal_seg_size = segment_size; + wal_manual_switch = manual_switch; + wal_consistent_read = consistent_read; + + MemSet(reader_data, 0, sizeof(XLogReaderData)); + reader_data->tli = tli; + reader_data->xlogfile = -1; if (allocate_reader) { - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, private_data); +#if PG_VERSION_NUM >= 110000 + xlogreader = XLogReaderAllocate(wal_seg_size, &SimpleXLogPageRead, + reader_data); +#else + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, reader_data); +#endif if (xlogreader == NULL) - elog(ERROR, "out of memory"); + elog(ERROR, "Out of memory"); + xlogreader->system_identifier = instance_config.system_identifier; } return xlogreader; } +/* + * Comparison function to sort xlog_thread_arg array. + */ +static int +xlog_thread_arg_comp(const void *a1, const void *a2) +{ + const xlog_thread_arg *arg1 = a1; + const xlog_thread_arg *arg2 = a2; + + return arg1->reader_data.xlogsegno - arg2->reader_data.xlogsegno; +} + +/* + * Run WAL processing routines using threads. Start from startpoint up to + * endpoint. It is possible to send zero endpoint, threads will read WAL + * infinitely in this case. + */ +static bool +RunXLogThreads(const char *archivedir, time_t target_time, + TransactionId target_xid, XLogRecPtr target_lsn, TimeLineID tli, + uint32 segment_size, XLogRecPtr startpoint, XLogRecPtr endpoint, + bool consistent_read, xlog_record_function process_record, + XLogRecTarget *last_rec, bool inclusive_endpoint) +{ + pthread_t *threads; + xlog_thread_arg *thread_args; + int i; + int threads_need = 0; + XLogSegNo endSegNo = 0; + bool result = true; + + if (!XRecOffIsValid(startpoint) && !XRecOffIsNull(startpoint)) + elog(ERROR, "Invalid startpoint value %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + + if (process_record) + elog(LOG, "Extracting pagemap from tli %i on range from %X/%X to %X/%X", + tli, + (uint32) (startpoint >> 32), (uint32) (startpoint), + (uint32) (endpoint >> 32), (uint32) (endpoint)); + + if (!XLogRecPtrIsInvalid(endpoint)) + { +// if (XRecOffIsNull(endpoint) && !inclusive_endpoint) + if (XRecOffIsNull(endpoint)) + { + GetXLogSegNo(endpoint, endSegNo, segment_size); + endSegNo--; + } + else if (!XRecOffIsValid(endpoint)) + { + elog(ERROR, "Invalid endpoint value %X/%X", + (uint32) (endpoint >> 32), (uint32) (endpoint)); + } + else + GetXLogSegNo(endpoint, endSegNo, segment_size); + } + + /* Initialize static variables for workers */ + wal_target_time = target_time; + wal_target_xid = target_xid; + wal_target_lsn = target_lsn; + + GetXLogSegNo(startpoint, segno_start, segment_size); + segno_target = 0; + GetXLogSegNo(startpoint, segno_next, segment_size); + segnum_read = 0; + segnum_corrupted = 0; + + threads = (pthread_t *) pgut_malloc(sizeof(pthread_t) * num_threads); + thread_args = (xlog_thread_arg *) pgut_malloc(sizeof(xlog_thread_arg) * num_threads); + + /* + * Initialize thread args. + * + * Each thread works with its own WAL segment and we need to adjust + * startpoint value for each thread. + */ + for (i = 0; i < num_threads; i++) + { + xlog_thread_arg *arg = &thread_args[i]; + + InitXLogPageRead(&arg->reader_data, archivedir, tli, segment_size, true, + consistent_read, false); + arg->reader_data.xlogsegno = segno_next; + arg->reader_data.thread_num = i + 1; + arg->process_record = process_record; + arg->startpoint = startpoint; + arg->endpoint = endpoint; + arg->endSegNo = endSegNo; + arg->inclusive_endpoint = inclusive_endpoint; + arg->got_target = false; + /* By default there is some error */ + arg->ret = 1; + + threads_need++; + segno_next++; + /* + * If we need to read less WAL segments than num_threads, create less + * threads. + */ + if (endSegNo != 0 && segno_next > endSegNo) + break; + GetXLogRecPtr(segno_next, 0, segment_size, startpoint); + } + + /* Run threads */ + thread_interrupted = false; + for (i = 0; i < threads_need; i++) + { + elog(VERBOSE, "Start WAL reader thread: %d", i + 1); + pthread_create(&threads[i], NULL, XLogThreadWorker, &thread_args[i]); + } + + /* Wait for threads */ + for (i = 0; i < threads_need; i++) + { + pthread_join(threads[i], NULL); + if (thread_args[i].ret == 1) + result = false; + } + + /* Release threads here, use thread_args only below */ + pfree(threads); + threads = NULL; + + if (last_rec) + { + /* + * We need to sort xlog_thread_arg array by xlogsegno to return latest + * possible record up to which restore is possible. We need to sort to + * detect failed thread between start segment and target segment. + * + * Loop stops on first failed thread. + */ + if (threads_need > 1) + qsort((void *) thread_args, threads_need, sizeof(xlog_thread_arg), + xlog_thread_arg_comp); + + for (i = 0; i < threads_need; i++) + { + XLogRecTarget *cur_rec; + + cur_rec = &thread_args[i].reader_data.cur_rec; + /* + * If we got the target return minimum possible record. + */ + if (segno_target > 0) + { + if (thread_args[i].got_target && + thread_args[i].reader_data.xlogsegno == segno_target) + { + *last_rec = *cur_rec; + break; + } + } + /* + * Else return maximum possible record up to which restore is + * possible. + */ + else if (last_rec->rec_lsn < cur_rec->rec_lsn) + *last_rec = *cur_rec; + + /* + * We reached failed thread, so stop here. We cannot use following + * WAL records after failed segment. + */ + if (thread_args[i].ret != 0) + break; + } + } + + pfree(thread_args); + + return result; +} + +/* + * WAL reader worker. + */ +void * +XLogThreadWorker(void *arg) +{ + xlog_thread_arg *thread_arg = (xlog_thread_arg *) arg; + XLogReaderData *reader_data = &thread_arg->reader_data; + XLogReaderState *xlogreader; + XLogSegNo nextSegNo = 0; + XLogRecPtr found; + uint32 prev_page_off = 0; + bool need_read = true; + +#if PG_VERSION_NUM >= 110000 + xlogreader = XLogReaderAllocate(wal_seg_size, &SimpleXLogPageRead, + reader_data); +#else + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, reader_data); +#endif + if (xlogreader == NULL) + elog(ERROR, "Thread [%d]: out of memory", reader_data->thread_num); + xlogreader->system_identifier = instance_config.system_identifier; + + found = XLogFindNextRecord(xlogreader, thread_arg->startpoint); + + /* + * We get invalid WAL record pointer usually when WAL segment is absent or + * is corrupted. + */ + if (XLogRecPtrIsInvalid(found)) + { + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + need_read = false; + else + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X: %s", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint)); + PrintXLogCorruptionMsg(reader_data, ERROR); + } + } + + thread_arg->startpoint = found; + + elog(VERBOSE, "Thread [%d]: Starting LSN: %X/%X", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint)); + + while (need_read) + { + XLogRecord *record; + char *errormsg; + bool stop_reading = false; + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + /* + * We need to switch to the next WAL segment after reading previous + * record. It may happen if we read contrecord. + */ + if (reader_data->need_switch && + !SwitchThreadToNextWal(xlogreader, thread_arg)) + break; + + record = XLogReadRecord(xlogreader, thread_arg->startpoint, &errormsg); + + if (record == NULL) + { + XLogRecPtr errptr; + + /* + * There is no record, try to switch to the next WAL segment. + * Usually SimpleXLogPageRead() does it by itself. But here we need + * to do it manually to support threads. + */ + if (reader_data->need_switch && errormsg == NULL) + { + if (SwitchThreadToNextWal(xlogreader, thread_arg)) + continue; + else + break; + } + + /* + * XLogWaitForConsistency() is normally used only with threads. + * Call it here for just in case. + */ + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + break; + else if (wal_consistent_read) + { + XLogSegNo segno_report; + + pthread_lock(&wal_segment_mutex); + segno_report = segno_start + segnum_read; + pthread_mutex_unlock(&wal_segment_mutex); + + /* + * Report error message if this is the first corrupted WAL. + */ + if (reader_data->xlogsegno > segno_report) + return NULL; /* otherwise just stop the thread */ + } + + errptr = thread_arg->startpoint ? + thread_arg->startpoint : xlogreader->EndRecPtr; + + if (errormsg) + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X: %s", + reader_data->thread_num, + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (errptr >> 32), (uint32) (errptr)); + + /* In we failed to read record located at endpoint position, + * and endpoint is not inclusive, do not consider this as an error. + */ + if (!thread_arg->inclusive_endpoint && + errptr == thread_arg->endpoint) + { + elog(LOG, "Thread [%d]: Endpoint %X/%X is not inclusive, switch to the next timeline", + reader_data->thread_num, + (uint32) (thread_arg->endpoint >> 32), (uint32) (thread_arg->endpoint)); + break; + } + + /* + * If we don't have all WAL files from prev backup start_lsn to current + * start_lsn, we won't be able to build page map and PAGE backup will + * be incorrect. Stop it and throw an error. + */ + PrintXLogCorruptionMsg(reader_data, ERROR); + } + + getRecordTimestamp(xlogreader, &reader_data->cur_rec.rec_time); + if (TransactionIdIsValid(XLogRecGetXid(xlogreader))) + reader_data->cur_rec.rec_xid = XLogRecGetXid(xlogreader); + reader_data->cur_rec.rec_lsn = xlogreader->ReadRecPtr; + + if (thread_arg->process_record) + thread_arg->process_record(xlogreader, reader_data, &stop_reading); + if (stop_reading) + { + thread_arg->got_target = true; + + pthread_lock(&wal_segment_mutex); + /* We should store least target segment number */ + if (segno_target == 0 || segno_target > reader_data->xlogsegno) + segno_target = reader_data->xlogsegno; + pthread_mutex_unlock(&wal_segment_mutex); + + break; + } + + /* + * Check if other thread got the target segment. Check it not very + * often, only every WAL page. + */ + if (wal_consistent_read && prev_page_off != 0 && + prev_page_off != reader_data->prev_page_off) + { + XLogSegNo segno; + + pthread_lock(&wal_segment_mutex); + segno = segno_target; + pthread_mutex_unlock(&wal_segment_mutex); + + if (segno != 0 && segno < reader_data->xlogsegno) + break; + } + prev_page_off = reader_data->prev_page_off; + + /* continue reading at next record */ + thread_arg->startpoint = InvalidXLogRecPtr; + + GetXLogSegNo(xlogreader->EndRecPtr, nextSegNo, wal_seg_size); + + if (thread_arg->endSegNo != 0 && + !XLogRecPtrIsInvalid(thread_arg->endpoint) && + /* + * Consider thread_arg->endSegNo and thread_arg->endpoint only if + * they are valid. + */ + xlogreader->ReadRecPtr >= thread_arg->endpoint && + nextSegNo >= thread_arg->endSegNo) + break; + } + + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + /* Extracting is successful */ + thread_arg->ret = 0; + return NULL; +} + +/* + * Do manual switch to the next WAL segment. + * + * Returns false if the reader reaches the end of a WAL segment list. + */ +static bool +SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg) +{ + XLogReaderData *reader_data; + XLogRecPtr found; + + reader_data = (XLogReaderData *) xlogreader->private_data; + reader_data->need_switch = false; + + /* Critical section */ + pthread_lock(&wal_segment_mutex); + Assert(segno_next); + reader_data->xlogsegno = segno_next; + segnum_read++; + segno_next++; + pthread_mutex_unlock(&wal_segment_mutex); + + /* We've reached the end */ + if (arg->endSegNo != 0 && reader_data->xlogsegno > arg->endSegNo) + return false; + + /* Adjust next record position */ + GetXLogRecPtr(reader_data->xlogsegno, 0, wal_seg_size, arg->startpoint); + /* We need to close previously opened file if it wasn't closed earlier */ + CleanupXLogPageRead(xlogreader); + /* Skip over the page header and contrecord if any */ + found = XLogFindNextRecord(xlogreader, arg->startpoint); + + /* + * We get invalid WAL record pointer usually when WAL segment is + * absent or is corrupted. + */ + if (XLogRecPtrIsInvalid(found)) + { + /* + * Check if we need to stop reading. We stop if other thread found a + * target segment. + */ + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + return false; + else if (wal_consistent_read) + { + XLogSegNo segno_report; + + pthread_lock(&wal_segment_mutex); + segno_report = segno_start + segnum_read; + pthread_mutex_unlock(&wal_segment_mutex); + + /* + * Report error message if this is the first corrupted WAL. + */ + if (reader_data->xlogsegno > segno_report) + return false; /* otherwise just stop the thread */ + } + + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); + PrintXLogCorruptionMsg(reader_data, ERROR); + } + arg->startpoint = found; + + elog(VERBOSE, "Thread [%d]: Switched to LSN %X/%X", + reader_data->thread_num, + (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); + + return true; +} + +/* + * Wait for other threads since the current thread couldn't read its segment. + * We need to decide is it fail or not. + * + * Returns true if there is no failure and previous target segment was found. + * Otherwise return false. + */ +static bool +XLogWaitForConsistency(XLogReaderState *xlogreader) +{ + uint32 segnum_need; + XLogReaderData *reader_data =(XLogReaderData *) xlogreader->private_data; + bool log_message = true; + + segnum_need = reader_data->xlogsegno - segno_start; + while (true) + { + uint32 segnum_current_read; + XLogSegNo segno; + + if (log_message) + { + char xlogfname[MAXFNAMELEN]; + + GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, + wal_seg_size); + + elog(VERBOSE, "Thread [%d]: Possible WAL corruption in %s. Wait for other threads to decide is this a failure", + reader_data->thread_num, xlogfname); + log_message = false; + } + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + pthread_lock(&wal_segment_mutex); + segnum_current_read = segnum_read + segnum_corrupted; + segno = segno_target; + pthread_mutex_unlock(&wal_segment_mutex); + + /* Other threads read all previous segments and didn't find target */ + if (segnum_need <= segnum_current_read) + { + /* Mark current segment as corrupted */ + pthread_lock(&wal_segment_mutex); + segnum_corrupted++; + pthread_mutex_unlock(&wal_segment_mutex); + return false; + } + + if (segno != 0 && segno < reader_data->xlogsegno) + return true; + + pg_usleep(500000L); /* 500 ms */ + } + + /* We shouldn't reach it */ + return false; +} + /* * Cleanup after WAL segment reading. */ static void CleanupXLogPageRead(XLogReaderState *xlogreader) { - XLogPageReadPrivate *private_data; + XLogReaderData *reader_data; - private_data = (XLogPageReadPrivate *) xlogreader->private_data; - if (private_data->xlogfile >= 0) + reader_data = (XLogReaderData *) xlogreader->private_data; + if (reader_data->xlogfile >= 0) { - close(private_data->xlogfile); - private_data->xlogfile = -1; + fio_close(reader_data->xlogfile); + reader_data->xlogfile = -1; } #ifdef HAVE_LIBZ - else if (private_data->gz_xlogfile != NULL) + else if (reader_data->gz_xlogfile != NULL) { - gzclose(private_data->gz_xlogfile); - private_data->gz_xlogfile = NULL; + fio_gzclose(reader_data->gz_xlogfile); + reader_data->gz_xlogfile = NULL; } #endif - private_data->xlogexists = false; + reader_data->prev_page_off = 0; + reader_data->xlogexists = false; } static void -PrintXLogCorruptionMsg(XLogPageReadPrivate *private_data, int elevel) +PrintXLogCorruptionMsg(XLogReaderData *reader_data, int elevel) { - if (private_data->xlogpath[0] != 0) + if (reader_data->xlogpath[0] != 0) { /* * XLOG reader couldn't read WAL segment. * We throw a WARNING here to be able to update backup status. */ - if (!private_data->xlogexists) - elog(elevel, "WAL segment \"%s\" is absent", private_data->xlogpath); - else if (private_data->xlogfile != -1) - elog(elevel, "Possible WAL corruption. " + if (!reader_data->xlogexists) + elog(elevel, "Thread [%d]: WAL segment \"%s\" is absent", + reader_data->thread_num, reader_data->xlogpath); + else if (reader_data->xlogfile != -1) + elog(elevel, "Thread [%d]: Possible WAL corruption. " "Error has occured during reading WAL segment \"%s\"", - private_data->xlogpath); + reader_data->thread_num, reader_data->xlogpath); #ifdef HAVE_LIBZ - else if (private_data->gz_xlogpath != NULL) - elog(elevel, "Possible WAL corruption. " + else if (reader_data->gz_xlogfile != NULL) + elog(elevel, "Thread [%d]: Possible WAL corruption. " "Error has occured during reading WAL segment \"%s\"", - private_data->gz_xlogpath); + reader_data->thread_num, reader_data->gz_xlogpath); #endif } + else + { + /* Cannot tell what happened specifically */ + elog(elevel, "Thread [%d]: An error occured during WAL reading", + reader_data->thread_num); + } } /* * Extract information about blocks modified in this record. */ static void -extractPageInfo(XLogReaderState *record) +extractPageInfo(XLogReaderState *record, XLogReaderData *reader_data, + bool *stop_reading) { uint8 block_id; RmgrId rmid = XLogRecGetRmid(record); @@ -969,7 +1767,7 @@ extractPageInfo(XLogReaderState *record) if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) continue; - /* We only care about the main fork; others are copied in toto */ + /* We only care about the main fork; others are copied as is */ if (forknum != MAIN_FORKNUM) continue; @@ -977,6 +1775,27 @@ extractPageInfo(XLogReaderState *record) } } +/* + * Check the current read WAL record during validation. + */ +static void +validateXLogRecord(XLogReaderState *record, XLogReaderData *reader_data, + bool *stop_reading) +{ + /* Check target xid */ + if (TransactionIdIsValid(wal_target_xid) && + wal_target_xid == reader_data->cur_rec.rec_xid) + *stop_reading = true; + /* Check target time */ + else if (wal_target_time != 0 && + timestamptz_to_time_t(reader_data->cur_rec.rec_time) >= wal_target_time) + *stop_reading = true; + /* Check target lsn */ + else if (XRecOffIsValid(wal_target_lsn) && + reader_data->cur_rec.rec_lsn >= wal_target_lsn) + *stop_reading = true; +} + /* * Extract timestamp from WAL record. * @@ -1013,3 +1832,28 @@ getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime) return false; } +bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size) +{ + XLogRecPtr startpoint; + XLogRecPtr endpoint; + + bool rc; + int tmp_num_threads = num_threads; + num_threads = 1; + + /* calculate startpoint and endpoint */ + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); + GetXLogRecPtr(segno+1, 0, wal_seg_size, endpoint); + + /* disable multi-threading */ + num_threads = 1; + + rc = RunXLogThreads(prefetch_dir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tli, wal_seg_size, + startpoint, endpoint, false, NULL, NULL, true); + + num_threads = tmp_num_threads; + + return rc; +} + diff --git a/src/pg_probackup.c b/src/pg_probackup.c index a18ee5c35..f2aca75fd 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -3,29 +3,53 @@ * pg_probackup.c: Backup/Recovery manager for PostgreSQL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" + +#include "pg_getopt.h" #include "streamutil.h" -#include "utils/thread.h" +#include "utils/file.h" -#include -#include -#include #include -#include -#include "pg_getopt.h" -const char *PROGRAM_VERSION = "2.0.18"; -const char *PROGRAM_URL = "https://github.com/postgrespro/pg_probackup"; -const char *PROGRAM_EMAIL = "https://github.com/postgrespro/pg_probackup/issues"; +#include "utils/configuration.h" +#include "utils/thread.h" +#include + +const char *PROGRAM_NAME = NULL; /* PROGRAM_NAME_FULL without .exe suffix + * if any */ +const char *PROGRAM_NAME_FULL = NULL; +const char *PROGRAM_FULL_PATH = NULL; +const char *PROGRAM_URL = "https://github.com/postgrespro/pg_probackup"; +const char *PROGRAM_EMAIL = "https://github.com/postgrespro/pg_probackup/issues"; + +typedef enum ProbackupSubcmd +{ + NO_CMD = 0, + INIT_CMD, + ADD_INSTANCE_CMD, + DELETE_INSTANCE_CMD, + ARCHIVE_PUSH_CMD, + ARCHIVE_GET_CMD, + BACKUP_CMD, + RESTORE_CMD, + VALIDATE_CMD, + DELETE_CMD, + MERGE_CMD, + SHOW_CMD, + SET_CONFIG_CMD, + SET_BACKUP_CMD, + SHOW_CONFIG_CMD, + CHECKDB_CMD +} ProbackupSubcmd; + /* directory options */ char *backup_path = NULL; -char *pgdata = NULL; /* * path or to the data files in the backup catalog * $BACKUP_PATH/backups/instance_name @@ -37,162 +61,219 @@ char backup_instance_path[MAXPGPATH]; */ char arclog_path[MAXPGPATH] = ""; +/* colon separated external directories list ("/path1:/path2") */ +char *externaldir = NULL; /* common options */ static char *backup_id_string = NULL; int num_threads = 1; bool stream_wal = false; +bool is_archive_cmd = false; +pid_t my_pid = 0; +__thread int my_thread_num = 1; bool progress = false; +bool no_sync = false; #if PG_VERSION_NUM >= 100000 char *replication_slot = NULL; #endif +bool temp_slot = false; /* backup options */ -bool backup_logs = false; -bool smooth_checkpoint; -bool is_remote_backup = false; -/* Wait timeout for WAL segment archiving */ -uint32 archive_timeout = ARCHIVE_TIMEOUT_DEFAULT; -const char *master_db = NULL; -const char *master_host = NULL; -const char *master_port= NULL; -const char *master_user = NULL; -uint32 replica_timeout = REPLICA_TIMEOUT_DEFAULT; - +bool backup_logs = false; +bool smooth_checkpoint; +char *remote_agent; +static char *backup_note = NULL; /* restore options */ -static char *target_time; -static char *target_xid; -static char *target_lsn; -static char *target_inclusive; +static char *target_time = NULL; +static char *target_xid = NULL; +static char *target_lsn = NULL; +static char *target_inclusive = NULL; static TimeLineID target_tli; +static char *target_stop; static bool target_immediate; static char *target_name = NULL; static char *target_action = NULL; +static char *primary_conninfo = NULL; + static pgRecoveryTarget *recovery_target_options = NULL; +static pgRestoreParams *restore_params = NULL; +time_t current_time = 0; bool restore_as_replica = false; -bool restore_no_validate = false; +bool no_validate = false; +IncrRestoreMode incremental_mode = INCR_NONE; + +bool skip_block_validation = false; +bool skip_external_dirs = false; + +/* array for datnames, provided via db-include and db-exclude */ +static parray *datname_exclude_list = NULL; +static parray *datname_include_list = NULL; + +/* checkdb options */ +bool need_amcheck = false; +bool heapallindexed = false; +bool amcheck_parent = false; /* delete options */ bool delete_wal = false; bool delete_expired = false; -bool apply_to_all = false; -bool force_delete = false; - -/* retention options */ -uint32 retention_redundancy = 0; -uint32 retention_window = 0; - +bool merge_expired = false; +bool force = false; +bool dry_run = false; +static char *delete_status = NULL; /* compression options */ -CompressAlg compress_alg = COMPRESS_ALG_DEFAULT; -int compress_level = COMPRESS_LEVEL_DEFAULT; bool compress_shortcut = false; - /* other options */ char *instance_name; -uint64 system_identifier = 0; /* archive push options */ +int batch_size = 1; static char *wal_file_path; static char *wal_file_name; -static bool file_overwrite = false; +static bool file_overwrite = false; +static bool no_ready_rename = false; + +/* archive get options */ +static char *prefetch_dir; +bool no_validate_wal = false; /* show options */ ShowFormat show_format = SHOW_PLAIN; +bool show_archive = false; + +/* set-backup options */ +int64 ttl = -1; +static char *expire_time_string = NULL; +static pgSetBackupParams *set_backup_params = NULL; /* current settings */ pgBackup current; -ProbackupSubcmd backup_subcmd = NO_CMD; +static ProbackupSubcmd backup_subcmd = NO_CMD; static bool help_opt = false; -static void opt_backup_mode(pgut_option *opt, const char *arg); -static void opt_log_level_console(pgut_option *opt, const char *arg); -static void opt_log_level_file(pgut_option *opt, const char *arg); -static void opt_compress_alg(pgut_option *opt, const char *arg); -static void opt_show_format(pgut_option *opt, const char *arg); +static void opt_incr_restore_mode(ConfigOption *opt, const char *arg); +static void opt_backup_mode(ConfigOption *opt, const char *arg); +static void opt_show_format(ConfigOption *opt, const char *arg); static void compress_init(void); -static pgut_option options[] = +static void opt_datname_exclude_list(ConfigOption *opt, const char *arg); +static void opt_datname_include_list(ConfigOption *opt, const char *arg); + +/* + * Short name should be non-printable ASCII character. + * Use values between 128 and 255. + */ +static ConfigOption cmd_options[] = { /* directory options */ - { 'b', 1, "help", &help_opt, SOURCE_CMDLINE }, - { 's', 'D', "pgdata", &pgdata, SOURCE_CMDLINE }, - { 's', 'B', "backup-path", &backup_path, SOURCE_CMDLINE }, + { 'b', 130, "help", &help_opt, SOURCE_CMD_STRICT }, + { 's', 'B', "backup-path", &backup_path, SOURCE_CMD_STRICT }, /* common options */ - { 'u', 'j', "threads", &num_threads, SOURCE_CMDLINE }, - { 'b', 2, "stream", &stream_wal, SOURCE_CMDLINE }, - { 'b', 3, "progress", &progress, SOURCE_CMDLINE }, - { 's', 'i', "backup-id", &backup_id_string, SOURCE_CMDLINE }, + { 'u', 'j', "threads", &num_threads, SOURCE_CMD_STRICT }, + { 'b', 131, "stream", &stream_wal, SOURCE_CMD_STRICT }, + { 'b', 132, "progress", &progress, SOURCE_CMD_STRICT }, + { 's', 'i', "backup-id", &backup_id_string, SOURCE_CMD_STRICT }, + { 'b', 133, "no-sync", &no_sync, SOURCE_CMD_STRICT }, /* backup options */ - { 'b', 10, "backup-pg-log", &backup_logs, SOURCE_CMDLINE }, - { 'f', 'b', "backup-mode", opt_backup_mode, SOURCE_CMDLINE }, - { 'b', 'C', "smooth-checkpoint", &smooth_checkpoint, SOURCE_CMDLINE }, - { 's', 'S', "slot", &replication_slot, SOURCE_CMDLINE }, - { 'u', 11, "archive-timeout", &archive_timeout, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, - { 'b', 12, "delete-wal", &delete_wal, SOURCE_CMDLINE }, - { 'b', 13, "delete-expired", &delete_expired, SOURCE_CMDLINE }, - { 's', 14, "master-db", &master_db, SOURCE_CMDLINE, }, - { 's', 15, "master-host", &master_host, SOURCE_CMDLINE, }, - { 's', 16, "master-port", &master_port, SOURCE_CMDLINE, }, - { 's', 17, "master-user", &master_user, SOURCE_CMDLINE, }, - { 'u', 18, "replica-timeout", &replica_timeout, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, - /* TODO not completed feature. Make it unavailiable from user level - { 'b', 18, "remote", &is_remote_backup, SOURCE_CMDLINE, }, */ + { 'b', 180, "backup-pg-log", &backup_logs, SOURCE_CMD_STRICT }, + { 'f', 'b', "backup-mode", opt_backup_mode, SOURCE_CMD_STRICT }, + { 'b', 'C', "smooth-checkpoint", &smooth_checkpoint, SOURCE_CMD_STRICT }, + { 's', 'S', "slot", &replication_slot, SOURCE_CMD_STRICT }, + { 'b', 181, "temp-slot", &temp_slot, SOURCE_CMD_STRICT }, + { 'b', 182, "delete-wal", &delete_wal, SOURCE_CMD_STRICT }, + { 'b', 183, "delete-expired", &delete_expired, SOURCE_CMD_STRICT }, + { 'b', 184, "merge-expired", &merge_expired, SOURCE_CMD_STRICT }, + { 'b', 185, "dry-run", &dry_run, SOURCE_CMD_STRICT }, + { 's', 238, "note", &backup_note, SOURCE_CMD_STRICT }, /* restore options */ - { 's', 20, "time", &target_time, SOURCE_CMDLINE }, - { 's', 21, "xid", &target_xid, SOURCE_CMDLINE }, - { 's', 22, "inclusive", &target_inclusive, SOURCE_CMDLINE }, - { 'u', 23, "timeline", &target_tli, SOURCE_CMDLINE }, - { 'f', 'T', "tablespace-mapping", opt_tablespace_map, SOURCE_CMDLINE }, - { 'b', 24, "immediate", &target_immediate, SOURCE_CMDLINE }, - { 's', 25, "recovery-target-name", &target_name, SOURCE_CMDLINE }, - { 's', 26, "recovery-target-action", &target_action, SOURCE_CMDLINE }, - { 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMDLINE }, - { 'b', 27, "no-validate", &restore_no_validate, SOURCE_CMDLINE }, - { 's', 28, "lsn", &target_lsn, SOURCE_CMDLINE }, + { 's', 136, "recovery-target-time", &target_time, SOURCE_CMD_STRICT }, + { 's', 137, "recovery-target-xid", &target_xid, SOURCE_CMD_STRICT }, + { 's', 144, "recovery-target-lsn", &target_lsn, SOURCE_CMD_STRICT }, + { 's', 138, "recovery-target-inclusive", &target_inclusive, SOURCE_CMD_STRICT }, + { 'u', 139, "recovery-target-timeline", &target_tli, SOURCE_CMD_STRICT }, + { 's', 157, "recovery-target", &target_stop, SOURCE_CMD_STRICT }, + { 'f', 'T', "tablespace-mapping", opt_tablespace_map, SOURCE_CMD_STRICT }, + { 'f', 155, "external-mapping", opt_externaldir_map, SOURCE_CMD_STRICT }, + { 's', 141, "recovery-target-name", &target_name, SOURCE_CMD_STRICT }, + { 's', 142, "recovery-target-action", &target_action, SOURCE_CMD_STRICT }, + { 'b', 143, "no-validate", &no_validate, SOURCE_CMD_STRICT }, + { 'b', 154, "skip-block-validation", &skip_block_validation, SOURCE_CMD_STRICT }, + { 'b', 156, "skip-external-dirs", &skip_external_dirs, SOURCE_CMD_STRICT }, + { 'f', 158, "db-include", opt_datname_include_list, SOURCE_CMD_STRICT }, + { 'f', 159, "db-exclude", opt_datname_exclude_list, SOURCE_CMD_STRICT }, + { 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT }, + { 's', 160, "primary-conninfo", &primary_conninfo, SOURCE_CMD_STRICT }, + { 's', 'S', "primary-slot-name",&replication_slot, SOURCE_CMD_STRICT }, + { 'f', 'I', "incremental-mode", opt_incr_restore_mode, SOURCE_CMD_STRICT }, + /* checkdb options */ + { 'b', 195, "amcheck", &need_amcheck, SOURCE_CMD_STRICT }, + { 'b', 196, "heapallindexed", &heapallindexed, SOURCE_CMD_STRICT }, + { 'b', 197, "parent", &amcheck_parent, SOURCE_CMD_STRICT }, /* delete options */ - { 'b', 130, "wal", &delete_wal, SOURCE_CMDLINE }, - { 'b', 131, "expired", &delete_expired, SOURCE_CMDLINE }, - { 'b', 132, "all", &apply_to_all, SOURCE_CMDLINE }, + { 'b', 145, "wal", &delete_wal, SOURCE_CMD_STRICT }, + { 'b', 146, "expired", &delete_expired, SOURCE_CMD_STRICT }, + { 's', 172, "status", &delete_status, SOURCE_CMD_STRICT }, + /* TODO not implemented yet */ - { 'b', 133, "force", &force_delete, SOURCE_CMDLINE }, - /* retention options */ - { 'u', 134, "retention-redundancy", &retention_redundancy, SOURCE_CMDLINE }, - { 'u', 135, "retention-window", &retention_window, SOURCE_CMDLINE }, + { 'b', 147, "force", &force, SOURCE_CMD_STRICT }, /* compression options */ - { 'f', 136, "compress-algorithm", opt_compress_alg, SOURCE_CMDLINE }, - { 'u', 137, "compress-level", &compress_level, SOURCE_CMDLINE }, - { 'b', 138, "compress", &compress_shortcut, SOURCE_CMDLINE }, - /* logging options */ - { 'f', 140, "log-level-console", opt_log_level_console, SOURCE_CMDLINE }, - { 'f', 141, "log-level-file", opt_log_level_file, SOURCE_CMDLINE }, - { 's', 142, "log-filename", &log_filename, SOURCE_CMDLINE }, - { 's', 143, "error-log-filename", &error_log_filename, SOURCE_CMDLINE }, - { 's', 144, "log-directory", &log_directory, SOURCE_CMDLINE }, - { 'u', 145, "log-rotation-size", &log_rotation_size, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_KB }, - { 'u', 146, "log-rotation-age", &log_rotation_age, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MIN }, + { 'b', 148, "compress", &compress_shortcut, SOURCE_CMD_STRICT }, /* connection options */ - { 's', 'd', "pgdatabase", &pgut_dbname, SOURCE_CMDLINE }, - { 's', 'h', "pghost", &host, SOURCE_CMDLINE }, - { 's', 'p', "pgport", &port, SOURCE_CMDLINE }, - { 's', 'U', "pguser", &username, SOURCE_CMDLINE }, - { 'B', 'w', "no-password", &prompt_password, SOURCE_CMDLINE }, - { 'b', 'W', "password", &force_password, SOURCE_CMDLINE }, + { 'B', 'w', "no-password", &prompt_password, SOURCE_CMD_STRICT }, + { 'b', 'W', "password", &force_password, SOURCE_CMD_STRICT }, /* other options */ - { 'U', 150, "system-identifier", &system_identifier, SOURCE_FILE_STRICT }, - { 's', 151, "instance", &instance_name, SOURCE_CMDLINE }, + { 's', 149, "instance", &instance_name, SOURCE_CMD_STRICT }, /* archive-push options */ - { 's', 160, "wal-file-path", &wal_file_path, SOURCE_CMDLINE }, - { 's', 161, "wal-file-name", &wal_file_name, SOURCE_CMDLINE }, - { 'b', 162, "overwrite", &file_overwrite, SOURCE_CMDLINE }, + { 's', 150, "wal-file-path", &wal_file_path, SOURCE_CMD_STRICT }, + { 's', 151, "wal-file-name", &wal_file_name, SOURCE_CMD_STRICT }, + { 'b', 152, "overwrite", &file_overwrite, SOURCE_CMD_STRICT }, + { 'b', 153, "no-ready-rename", &no_ready_rename, SOURCE_CMD_STRICT }, + { 'i', 162, "batch-size", &batch_size, SOURCE_CMD_STRICT }, + /* archive-get options */ + { 's', 163, "prefetch-dir", &prefetch_dir, SOURCE_CMD_STRICT }, + { 'b', 164, "no-validate-wal", &no_validate_wal, SOURCE_CMD_STRICT }, /* show options */ - { 'f', 170, "format", opt_show_format, SOURCE_CMDLINE }, + { 'f', 165, "format", opt_show_format, SOURCE_CMD_STRICT }, + { 'b', 166, "archive", &show_archive, SOURCE_CMD_STRICT }, + /* set-backup options */ + { 'I', 170, "ttl", &ttl, SOURCE_CMD_STRICT, SOURCE_DEFAULT, 0, OPTION_UNIT_S, option_get_value}, + { 's', 171, "expire-time", &expire_time_string, SOURCE_CMD_STRICT }, + + /* options for backward compatibility + * TODO: remove in 3.0.0 + */ + { 's', 136, "time", &target_time, SOURCE_CMD_STRICT }, + { 's', 137, "xid", &target_xid, SOURCE_CMD_STRICT }, + { 's', 138, "inclusive", &target_inclusive, SOURCE_CMD_STRICT }, + { 'u', 139, "timeline", &target_tli, SOURCE_CMD_STRICT }, + { 's', 144, "lsn", &target_lsn, SOURCE_CMD_STRICT }, + { 'b', 140, "immediate", &target_immediate, SOURCE_CMD_STRICT }, + { 0 } }; +static void +setMyLocation(void) +{ + +#ifdef WIN32 + if (IsSshProtocol()) + elog(ERROR, "Currently remote operations on Windows are not supported"); +#endif + + MyLocation = IsSshProtocol() + ? (backup_subcmd == ARCHIVE_PUSH_CMD || backup_subcmd == ARCHIVE_GET_CMD) + ? FIO_DB_HOST + : (backup_subcmd == BACKUP_CMD || backup_subcmd == RESTORE_CMD || backup_subcmd == ADD_INSTANCE_CMD) + ? FIO_BACKUP_HOST + : FIO_LOCAL_HOST + : FIO_LOCAL_HOST; +} + /* * Entry point of pg_probackup command. */ @@ -201,15 +282,31 @@ main(int argc, char *argv[]) { char *command = NULL, *command_name; - /* Check if backup_path is directory. */ - struct stat stat_buf; - int rc; - /* initialize configuration */ + PROGRAM_NAME_FULL = argv[0]; + + /* Initialize current backup */ pgBackupInit(¤t); + /* Initialize current instance configuration */ + init_config(&instance_config, instance_name); + PROGRAM_NAME = get_progname(argv[0]); - set_pglocale_pgservice(argv[0], "pgscripts"); + PROGRAM_FULL_PATH = palloc0(MAXPGPATH); + + /* Get current time */ + current_time = time(NULL); + + my_pid = getpid(); + //set_pglocale_pgservice(argv[0], "pgscripts"); + +#if PG_VERSION_NUM >= 110000 + /* + * Reset WAL segment size, we will retreive it using RetrieveWalSegSize() + * later. + */ + WalSegSz = 0; +#endif /* * Save main thread's tid. It is used call exit() in case of errors. @@ -243,8 +340,31 @@ main(int argc, char *argv[]) backup_subcmd = SHOW_CMD; else if (strcmp(argv[1], "set-config") == 0) backup_subcmd = SET_CONFIG_CMD; + else if (strcmp(argv[1], "set-backup") == 0) + backup_subcmd = SET_BACKUP_CMD; else if (strcmp(argv[1], "show-config") == 0) backup_subcmd = SHOW_CONFIG_CMD; + else if (strcmp(argv[1], "checkdb") == 0) + backup_subcmd = CHECKDB_CMD; +#ifdef WIN32 + else if (strcmp(argv[1], "ssh") == 0) + launch_ssh(argv); +#endif + else if (strcmp(argv[1], "agent") == 0) + { + /* 'No forward compatibility' sanity: + * /old/binary -> ssh execute -> /newer/binary agent version_num + * If we are executed as an agent for older binary, then exit with error + */ + if (argc > 2) + { + elog(ERROR, "Version mismatch, pg_probackup binary with version '%s' " + "is launched as an agent for pg_probackup binary with version '%s'", + PROGRAM_VERSION, argv[2]); + } + fio_communicate(STDIN_FILENO, STDOUT_FILENO); + return 0; + } else if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0 || strcmp(argv[1], "help") == 0) @@ -259,11 +379,11 @@ main(int argc, char *argv[]) || strcmp(argv[1], "-V") == 0) { #ifdef PGPRO_VERSION - fprintf(stderr, "%s %s (Postgres Pro %s %s)\n", + fprintf(stdout, "%s %s (Postgres Pro %s %s)\n", PROGRAM_NAME, PROGRAM_VERSION, PGPRO_VERSION, PGPRO_EDITION); #else - fprintf(stderr, "%s %s (PostgreSQL %s)\n", + fprintf(stdout, "%s %s (PostgreSQL %s)\n", PROGRAM_NAME, PROGRAM_VERSION, PG_VERSION); #endif exit(0); @@ -279,12 +399,15 @@ main(int argc, char *argv[]) * Make command string before getopt_long() will call. It permutes the * content of argv. */ + /* TODO why do we do that only for some commands? */ command_name = pstrdup(argv[1]); if (backup_subcmd == BACKUP_CMD || backup_subcmd == RESTORE_CMD || backup_subcmd == VALIDATE_CMD || backup_subcmd == DELETE_CMD || - backup_subcmd == MERGE_CMD) + backup_subcmd == MERGE_CMD || + backup_subcmd == SET_CONFIG_CMD || + backup_subcmd == SET_BACKUP_CMD) { int i, len = 0, @@ -312,13 +435,15 @@ main(int argc, char *argv[]) } optind += 1; - /* Parse command line arguments */ - pgut_getopt(argc, argv, options); + /* Parse command line only arguments */ + config_get_opt(argc, argv, cmd_options, instance_options); + + pgut_init(); if (help_opt) help_command(command_name); - /* backup_path is required for all pg_probackup commands except help */ + /* backup_path is required for all pg_probackup commands except help and checkdb */ if (backup_path == NULL) { /* @@ -326,86 +451,212 @@ main(int argc, char *argv[]) * from environment variable */ backup_path = getenv("BACKUP_PATH"); - if (backup_path == NULL) + if (backup_path == NULL && backup_subcmd != CHECKDB_CMD) elog(ERROR, "required parameter not specified: BACKUP_PATH (-B, --backup-path)"); } - canonicalize_path(backup_path); - - /* Ensure that backup_path is an absolute path */ - if (!is_absolute_path(backup_path)) - elog(ERROR, "-B, --backup-path must be an absolute path"); - /* Ensure that backup_path is a path to a directory */ - rc = stat(backup_path, &stat_buf); - if (rc != -1 && !S_ISDIR(stat_buf.st_mode)) - elog(ERROR, "-B, --backup-path must be a path to directory"); + setMyLocation(); - /* command was initialized for a few commands */ - if (command) + if (backup_path != NULL) { - elog_file(INFO, "command: %s", command); + canonicalize_path(backup_path); - pfree(command); - command = NULL; + /* Ensure that backup_path is an absolute path */ + if (!is_absolute_path(backup_path)) + elog(ERROR, "-B, --backup-path must be an absolute path"); } - /* Option --instance is required for all commands except init and show */ - if (backup_subcmd != INIT_CMD && backup_subcmd != SHOW_CMD && - backup_subcmd != VALIDATE_CMD) + /* Ensure that backup_path is an absolute path */ + if (backup_path && !is_absolute_path(backup_path)) + elog(ERROR, "-B, --backup-path must be an absolute path"); + + + /* + * Option --instance is required for all commands except + * init, show, checkdb and validate + */ + if (instance_name == NULL) { - if (instance_name == NULL) + if (backup_subcmd != INIT_CMD && backup_subcmd != SHOW_CMD && + backup_subcmd != VALIDATE_CMD && backup_subcmd != CHECKDB_CMD) elog(ERROR, "required parameter not specified: --instance"); } + else + /* Set instance name */ + instance_config.name = pgut_strdup(instance_name); /* * If --instance option was passed, construct paths for backup data and * xlog files of this backup instance. */ - if (instance_name) + if ((backup_path != NULL) && instance_name) { + /* + * Fill global variables used to generate pathes inside the instance's + * backup catalog. + * TODO replace global variables with InstanceConfig structure fields + */ sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance_name); sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); + /* + * Fill InstanceConfig structure fields used to generate pathes inside + * the instance's backup catalog. + * TODO continue refactoring to use these fields instead of global vars + */ + sprintf(instance_config.backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); + canonicalize_path(instance_config.backup_instance_path); + + sprintf(instance_config.arclog_path, "%s/%s/%s", + backup_path, "wal", instance_name); + canonicalize_path(instance_config.arclog_path); + /* * Ensure that requested backup instance exists. - * for all commands except init, which doesn't take this parameter - * and add-instance which creates new instance. + * for all commands except init, which doesn't take this parameter, + * add-instance, which creates new instance, + * and archive-get, which just do not require it at this point */ - if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD) + if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD && + backup_subcmd != ARCHIVE_GET_CMD) { - if (access(backup_instance_path, F_OK) != 0) + struct stat st; + + if (fio_stat(backup_instance_path, &st, true, FIO_BACKUP_HOST) != 0) + { + elog(WARNING, "Failed to access directory \"%s\": %s", + backup_instance_path, strerror(errno)); + + // TODO: redundant message, should we get rid of it? elog(ERROR, "Instance '%s' does not exist in this backup catalog", instance_name); + } + else + { + /* Ensure that backup_path is a path to a directory */ + if (!S_ISDIR(st.st_mode)) + elog(ERROR, "-B, --backup-path must be a path to directory"); + } } } /* - * Read options from env variables or from config file, - * unless we're going to set them via set-config. + * We read options from command line, now we need to read them from + * configuration file since we got backup path and instance name. + * For some commands an instance option isn't required, see above. */ - if (instance_name && backup_subcmd != SET_CONFIG_CMD) + if (instance_name) { char path[MAXPGPATH]; - /* Read environment variables */ - pgut_getopt_env(options); + config_get_opt_env(instance_options); /* Read options from configuration file */ - join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); - pgut_readopt(path, options, ERROR); + if (backup_subcmd != ADD_INSTANCE_CMD && + backup_subcmd != ARCHIVE_GET_CMD) + { + join_path_components(path, backup_instance_path, + BACKUP_CATALOG_CONF_FILE); + + if (backup_subcmd == CHECKDB_CMD) + config_read_opt(path, instance_options, ERROR, true, true); + else + config_read_opt(path, instance_options, ERROR, true, false); + } + setMyLocation(); + } + + /* + * Disable logging into file for archive-push and archive-get. + * Note, that we should NOT use fio_is_remote() here, + * because it will launch ssh connection and we do not + * want it, because it will kill archive-get prefetch + * performance. + * + * TODO: make logging into file possible via ssh + */ + if (fio_is_remote_simple(FIO_BACKUP_HOST) && + (backup_subcmd == ARCHIVE_GET_CMD || + backup_subcmd == ARCHIVE_PUSH_CMD)) + { + instance_config.logger.log_level_file = LOG_OFF; + is_archive_cmd = true; } + + /* Just read environment variables */ + if (backup_path == NULL && backup_subcmd == CHECKDB_CMD) + config_get_opt_env(instance_options); + + /* Sanity for checkdb, if backup_dir is provided but pgdata and instance are not */ + if (backup_subcmd == CHECKDB_CMD && + backup_path != NULL && + instance_name == NULL && + instance_config.pgdata == NULL) + elog(ERROR, "required parameter not specified: --instance"); + + /* Usually checkdb for file logging requires log_directory + * to be specified explicitly, but if backup_dir and instance name are provided, + * checkdb can use the usual default values or values from config + */ + if (backup_subcmd == CHECKDB_CMD && + (instance_config.logger.log_level_file != LOG_OFF && + instance_config.logger.log_directory == NULL) && + (!instance_config.pgdata || !instance_name)) + elog(ERROR, "Cannot save checkdb logs to a file. " + "You must specify --log-directory option when running checkdb with " + "--log-level-file option enabled."); + /* Initialize logger */ - init_logger(backup_path); + init_logger(backup_path, &instance_config.logger); + + /* command was initialized for a few commands */ + if (command) + { + elog_file(INFO, "command: %s", command); + + pfree(command); + command = NULL; + } + + /* For archive-push and archive-get skip full path lookup */ + if ((backup_subcmd != ARCHIVE_GET_CMD && + backup_subcmd != ARCHIVE_PUSH_CMD) && + (find_my_exec(argv[0],(char *) PROGRAM_FULL_PATH) < 0)) + { + PROGRAM_FULL_PATH = NULL; + elog(WARNING, "%s: could not find a full path to executable", PROGRAM_NAME); + } /* * We have read pgdata path from command line or from configuration file. * Ensure that pgdata is an absolute path. */ - if (pgdata != NULL && !is_absolute_path(pgdata)) + if (instance_config.pgdata != NULL) + canonicalize_path(instance_config.pgdata); + if (instance_config.pgdata != NULL && + !is_absolute_path(instance_config.pgdata)) elog(ERROR, "-D, --pgdata must be an absolute path"); +#if PG_VERSION_NUM >= 110000 + /* Check xlog-seg-size option */ + if (instance_name && + backup_subcmd != INIT_CMD && + backup_subcmd != ADD_INSTANCE_CMD && backup_subcmd != SET_CONFIG_CMD && + !IsValidWalSegSize(instance_config.xlog_seg_size)) + { + /* If we are working with instance of PG<11 using PG11 binary, + * then xlog_seg_size is equal to zero. Manually set it to 16MB. + */ + if (instance_config.xlog_seg_size == 0) + instance_config.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; + else + elog(ERROR, "Invalid WAL segment size %u", instance_config.xlog_seg_size); + } +#endif + /* Sanity check of --backup-id option */ if (backup_id_string != NULL) { @@ -413,6 +664,7 @@ main(int argc, char *argv[]) backup_subcmd != VALIDATE_CMD && backup_subcmd != DELETE_CMD && backup_subcmd != MERGE_CMD && + backup_subcmd != SET_BACKUP_CMD && backup_subcmd != SHOW_CMD) elog(ERROR, "Cannot use -i (--backup-id) option together with the \"%s\" command", command_name); @@ -422,97 +674,202 @@ main(int argc, char *argv[]) elog(ERROR, "Invalid backup-id \"%s\"", backup_id_string); } - /* Setup stream options. They are used in streamutil.c. */ - if (host != NULL) - dbhost = pstrdup(host); - if (port != NULL) - dbport = pstrdup(port); - if (username != NULL) - dbuser = pstrdup(username); - - /* setup exclusion list for file search */ - if (!backup_logs) - { - int i; + if (!instance_config.conn_opt.pghost && instance_config.remote.host) + instance_config.conn_opt.pghost = instance_config.remote.host; - for (i = 0; pgdata_exclude_dir[i]; i++); /* find first empty slot */ + /* Setup stream options. They are used in streamutil.c. */ + if (instance_config.conn_opt.pghost != NULL) + dbhost = pstrdup(instance_config.conn_opt.pghost); + if (instance_config.conn_opt.pgport != NULL) + dbport = pstrdup(instance_config.conn_opt.pgport); + if (instance_config.conn_opt.pguser != NULL) + dbuser = pstrdup(instance_config.conn_opt.pguser); - /* Set 'pg_log' in first empty slot */ - pgdata_exclude_dir[i] = "pg_log"; + if (backup_subcmd == VALIDATE_CMD || backup_subcmd == RESTORE_CMD) + { + /* + * Parse all recovery target options into recovery_target_options + * structure. + */ + recovery_target_options = + parseRecoveryTargetOptions(target_time, target_xid, + target_inclusive, target_tli, target_lsn, + (target_stop != NULL) ? target_stop : + (target_immediate) ? "immediate" : NULL, + target_name, target_action); + + if (force && backup_subcmd != RESTORE_CMD) + elog(ERROR, "You cannot specify \"--force\" flag with the \"%s\" command", + command_name); + + if (force) + no_validate = true; + + if (replication_slot != NULL) + restore_as_replica = true; + + /* keep all params in one structure */ + restore_params = pgut_new(pgRestoreParams); + restore_params->is_restore = (backup_subcmd == RESTORE_CMD); + restore_params->force = force; + restore_params->no_validate = no_validate; + restore_params->restore_as_replica = restore_as_replica; + restore_params->primary_slot_name = replication_slot; + restore_params->skip_block_validation = skip_block_validation; + restore_params->skip_external_dirs = skip_external_dirs; + restore_params->partial_db_list = NULL; + restore_params->partial_restore_type = NONE; + restore_params->primary_conninfo = primary_conninfo; + restore_params->incremental_mode = incremental_mode; + + /* handle partial restore parameters */ + if (datname_exclude_list && datname_include_list) + elog(ERROR, "You cannot specify '--db-include' and '--db-exclude' together"); + + if (datname_exclude_list) + { + restore_params->partial_restore_type = EXCLUDE; + restore_params->partial_db_list = datname_exclude_list; + } + else if (datname_include_list) + { + restore_params->partial_restore_type = INCLUDE; + restore_params->partial_db_list = datname_include_list; + } } - if (backup_subcmd == VALIDATE_CMD || backup_subcmd == RESTORE_CMD) + /* + * Parse set-backup options into set_backup_params structure. + */ + if (backup_subcmd == SET_BACKUP_CMD || backup_subcmd == BACKUP_CMD) { - /* parse all recovery target options into recovery_target_options structure */ - recovery_target_options = parseRecoveryTargetOptions(target_time, target_xid, - target_inclusive, target_tli, target_lsn, target_immediate, - target_name, target_action, restore_no_validate); + time_t expire_time = 0; + + if (expire_time_string && ttl >= 0) + elog(ERROR, "You cannot specify '--expire-time' and '--ttl' options together"); + + /* Parse string to seconds */ + if (expire_time_string) + { + if (!parse_time(expire_time_string, &expire_time, false)) + elog(ERROR, "Invalid value for '--expire-time' option: '%s'", + expire_time_string); + } + + if (expire_time > 0 || ttl >= 0 || backup_note) + { + set_backup_params = pgut_new(pgSetBackupParams); + set_backup_params->ttl = ttl; + set_backup_params->expire_time = expire_time; + set_backup_params->note = backup_note; + + if (backup_note && strlen(backup_note) > MAX_NOTE_SIZE) + elog(ERROR, "Backup note cannot exceed %u bytes", MAX_NOTE_SIZE); + } } + /* sanity */ + if (backup_subcmd == VALIDATE_CMD && restore_params->no_validate) + elog(ERROR, "You cannot specify \"--no-validate\" option with the \"%s\" command", + command_name); + if (num_threads < 1) num_threads = 1; + if (batch_size < 1) + batch_size = 1; + compress_init(); /* do actual operation */ switch (backup_subcmd) { case ARCHIVE_PUSH_CMD: - return do_archive_push(wal_file_path, wal_file_name, file_overwrite); + do_archive_push(&instance_config, wal_file_path, wal_file_name, + batch_size, file_overwrite, no_sync, no_ready_rename); + break; case ARCHIVE_GET_CMD: - return do_archive_get(wal_file_path, wal_file_name); + do_archive_get(&instance_config, prefetch_dir, + wal_file_path, wal_file_name, batch_size, !no_validate_wal); + break; case ADD_INSTANCE_CMD: - return do_add_instance(); + return do_add_instance(&instance_config); case DELETE_INSTANCE_CMD: return do_delete_instance(); case INIT_CMD: return do_init(); case BACKUP_CMD: { - const char *backup_mode; - time_t start_time; + time_t start_time = time(NULL); - start_time = time(NULL); - backup_mode = deparse_backup_mode(current.backup_mode); current.stream = stream_wal; - elog(INFO, "Backup start, pg_probackup version: %s, backup ID: %s, backup mode: %s, instance: %s, stream: %s, remote: %s", - PROGRAM_VERSION, base36enc(start_time), backup_mode, instance_name, - stream_wal ? "true" : "false", is_remote_backup ? "true" : "false"); + /* sanity */ + if (current.backup_mode == BACKUP_MODE_INVALID) + elog(ERROR, "required parameter not specified: BACKUP_MODE " + "(-b, --backup-mode)"); - return do_backup(start_time); + return do_backup(start_time, set_backup_params, no_validate, no_sync, backup_logs); } case RESTORE_CMD: return do_restore_or_validate(current.backup_id, - recovery_target_options, - true); + recovery_target_options, + restore_params, no_sync); case VALIDATE_CMD: - if (current.backup_id == 0 && target_time == 0 && target_xid == 0) + if (current.backup_id == 0 && target_time == 0 && target_xid == 0 && !target_lsn) + { + /* sanity */ + if (datname_exclude_list || datname_include_list) + elog(ERROR, "You must specify parameter (-i, --backup-id) for partial validation"); + return do_validate_all(); + } else + /* PITR validation and, optionally, partial validation */ return do_restore_or_validate(current.backup_id, recovery_target_options, - false); + restore_params, + no_sync); case SHOW_CMD: - return do_show(current.backup_id); + return do_show(instance_name, current.backup_id, show_archive); case DELETE_CMD: if (delete_expired && backup_id_string) - elog(ERROR, "You cannot specify --delete-expired and --backup-id options together"); - if (!delete_expired && !delete_wal && !backup_id_string) - elog(ERROR, "You must specify at least one of the delete options: --expired |--wal |--backup_id"); - if (delete_wal && !delete_expired && !backup_id_string) - return do_retention_purge(); - if (delete_expired) - return do_retention_purge(); + elog(ERROR, "You cannot specify --delete-expired and (-i, --backup-id) options together"); + if (merge_expired && backup_id_string) + elog(ERROR, "You cannot specify --merge-expired and (-i, --backup-id) options together"); + if (delete_status && backup_id_string) + elog(ERROR, "You cannot specify --status and (-i, --backup-id) options together"); + if (!delete_expired && !merge_expired && !delete_wal && delete_status == NULL && !backup_id_string) + elog(ERROR, "You must specify at least one of the delete options: " + "--delete-expired |--delete-wal |--merge-expired |--status |(-i, --backup-id)"); + if (!backup_id_string) + { + if (delete_status) + do_delete_status(&instance_config, delete_status); + else + do_retention(); + } else - return do_delete(current.backup_id); + do_delete(current.backup_id); + break; case MERGE_CMD: do_merge(current.backup_id); break; case SHOW_CONFIG_CMD: - return do_configure(true); + do_show_config(); + break; case SET_CONFIG_CMD: - return do_configure(false); + do_set_config(false); + break; + case SET_BACKUP_CMD: + if (!backup_id_string) + elog(ERROR, "You must specify parameter (-i, --backup-id) for 'set-backup' command"); + do_set_backup(instance_name, current.backup_id, set_backup_params); + break; + case CHECKDB_CMD: + do_checkdb(need_amcheck, + instance_config.conn_opt, instance_config.pgdata); + break; case NO_CMD: /* Should not happen */ elog(ERROR, "Unknown subcommand"); @@ -522,25 +879,36 @@ main(int argc, char *argv[]) } static void -opt_backup_mode(pgut_option *opt, const char *arg) +opt_incr_restore_mode(ConfigOption *opt, const char *arg) { - current.backup_mode = parse_backup_mode(arg); -} + if (pg_strcasecmp(arg, "none") == 0) + { + incremental_mode = INCR_NONE; + return; + } + else if (pg_strcasecmp(arg, "checksum") == 0) + { + incremental_mode = INCR_CHECKSUM; + return; + } + else if (pg_strcasecmp(arg, "lsn") == 0) + { + incremental_mode = INCR_LSN; + return; + } -static void -opt_log_level_console(pgut_option *opt, const char *arg) -{ - log_level_console = parse_log_level(arg); + /* Backup mode is invalid, so leave with an error */ + elog(ERROR, "Invalid value for '--incremental-mode' option: '%s'", arg); } static void -opt_log_level_file(pgut_option *opt, const char *arg) +opt_backup_mode(ConfigOption *opt, const char *arg) { - log_level_file = parse_log_level(arg); + current.backup_mode = parse_backup_mode(arg); } static void -opt_show_format(pgut_option *opt, const char *arg) +opt_show_format(ConfigOption *opt, const char *arg) { const char *v = arg; size_t len; @@ -563,12 +931,6 @@ opt_show_format(pgut_option *opt, const char *arg) elog(ERROR, "Invalid show format \"%s\"", arg); } -static void -opt_compress_alg(pgut_option *opt, const char *arg) -{ - compress_alg = parse_compress_alg(arg); -} - /* * Initialize compress and sanity checks for compress. */ @@ -577,29 +939,67 @@ compress_init(void) { /* Default algorithm is zlib */ if (compress_shortcut) - compress_alg = ZLIB_COMPRESS; + instance_config.compress_alg = ZLIB_COMPRESS; if (backup_subcmd != SET_CONFIG_CMD) { - if (compress_level != COMPRESS_LEVEL_DEFAULT - && compress_alg == NOT_DEFINED_COMPRESS) - elog(ERROR, "Cannot specify compress-level option without compress-alg option"); + if (instance_config.compress_level != COMPRESS_LEVEL_DEFAULT + && instance_config.compress_alg == NOT_DEFINED_COMPRESS) + elog(ERROR, "Cannot specify compress-level option alone without " + "compress-algorithm option"); } - if (compress_level < 0 || compress_level > 9) + if (instance_config.compress_level < 0 || instance_config.compress_level > 9) elog(ERROR, "--compress-level value must be in the range from 0 to 9"); - if (compress_level == 0) - compress_alg = NOT_DEFINED_COMPRESS; + if (instance_config.compress_alg == ZLIB_COMPRESS && instance_config.compress_level == 0) + elog(WARNING, "Compression level 0 will lead to data bloat!"); if (backup_subcmd == BACKUP_CMD || backup_subcmd == ARCHIVE_PUSH_CMD) { #ifndef HAVE_LIBZ - if (compress_alg == ZLIB_COMPRESS) + if (instance_config.compress_alg == ZLIB_COMPRESS) elog(ERROR, "This build does not support zlib compression"); else #endif - if (compress_alg == PGLZ_COMPRESS && num_threads > 1) + if (instance_config.compress_alg == PGLZ_COMPRESS && num_threads > 1) elog(ERROR, "Multithread backup does not support pglz compression"); } } + +/* Construct array of datnames, provided by user via db-exclude option */ +void +opt_datname_exclude_list(ConfigOption *opt, const char *arg) +{ + char *dbname = NULL; + + if (!datname_exclude_list) + datname_exclude_list = parray_new(); + + dbname = pgut_malloc(strlen(arg) + 1); + + /* TODO add sanity for database name */ + strcpy(dbname, arg); + + parray_append(datname_exclude_list, dbname); +} + +/* Construct array of datnames, provided by user via db-include option */ +void +opt_datname_include_list(ConfigOption *opt, const char *arg) +{ + char *dbname = NULL; + + if (!datname_include_list) + datname_include_list = parray_new(); + + dbname = pgut_malloc(strlen(arg) + 1); + + if (strcmp(dbname, "tempate0") == 0 || + strcmp(dbname, "tempate1") == 0) + elog(ERROR, "Databases 'template0' and 'template1' cannot be used for partial restore or validation"); + + strcpy(dbname, arg); + + parray_append(datname_include_list, dbname); +} diff --git a/src/pg_probackup.h b/src/pg_probackup.h index 9da22ad64..b995be062 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -3,7 +3,7 @@ * pg_probackup.h: Backup/Recovery manager for PostgreSQL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -11,61 +11,75 @@ #define PG_PROBACKUP_H #include "postgres_fe.h" +#include "libpq-fe.h" +#include "libpq-int.h" -#include -#include - -#include "access/timeline.h" -#include "access/xlogdefs.h" #include "access/xlog_internal.h" -#include "catalog/pg_control.h" -#include "storage/block.h" -#include "storage/bufpage.h" -#include "storage/checksum.h" #include "utils/pg_crc.h" -#include "common/relpath.h" -#include "port.h" + +#if PG_VERSION_NUM >= 120000 +#include "common/logging.h" +#endif #ifdef FRONTEND #undef FRONTEND - #include "port/atomics.h" +#include #define FRONTEND +#else +#include #endif +#include "utils/configuration.h" +#include "utils/logger.h" +#include "utils/remote.h" #include "utils/parray.h" #include "utils/pgut.h" +#include "utils/file.h" #include "datapagemap.h" +#include "utils/thread.h" -# define PG_STOP_BACKUP_TIMEOUT 300 -/* - * Macro needed to parse ptrack. - * NOTE Keep those values syncronised with definitions in ptrack.h - */ -#define PTRACK_BITS_PER_HEAPBLOCK 1 -#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / PTRACK_BITS_PER_HEAPBLOCK) +#ifdef WIN32 +#define __thread __declspec(thread) +#else +#include +#endif + +/* pgut client variables and full path */ +extern const char *PROGRAM_NAME; +extern const char *PROGRAM_NAME_FULL; +extern const char *PROGRAM_FULL_PATH; +extern const char *PROGRAM_URL; +extern const char *PROGRAM_EMAIL; /* Directory/File names */ #define DATABASE_DIR "database" #define BACKUPS_DIR "backups" #if PG_VERSION_NUM >= 100000 #define PG_XLOG_DIR "pg_wal" +#define PG_LOG_DIR "log" #else #define PG_XLOG_DIR "pg_xlog" +#define PG_LOG_DIR "pg_log" #endif #define PG_TBLSPC_DIR "pg_tblspc" #define PG_GLOBAL_DIR "global" #define BACKUP_CONTROL_FILE "backup.control" #define BACKUP_CATALOG_CONF_FILE "pg_probackup.conf" -#define BACKUP_CATALOG_PID "pg_probackup.pid" +#define BACKUP_CATALOG_PID "backup.pid" #define DATABASE_FILE_LIST "backup_content.control" #define PG_BACKUP_LABEL_FILE "backup_label" -#define PG_BLACK_LIST "black_list" #define PG_TABLESPACE_MAP_FILE "tablespace_map" +#define EXTERNAL_DIR "external_directories/externaldir" +#define DATABASE_MAP "database_map" +#define HEADER_MAP "page_header_map" +#define HEADER_MAP_TMP "page_header_map_tmp" + +/* Timeout defaults */ +#define ARCHIVE_TIMEOUT_DEFAULT 300 +#define REPLICA_TIMEOUT_DEFAULT 300 -#define LOG_FILENAME_DEFAULT "pg_probackup.log" -#define LOG_DIRECTORY_DEFAULT "log" -/* Direcotry/File permission */ +/* Directory/File permission */ #define DIR_PERMISSION (0700) #define FILE_PERMISSION (0600) @@ -74,6 +88,62 @@ #define XID_FMT "%u" #endif +#ifndef STDIN_FILENO +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#endif + +/* stdio buffer size */ +#define STDIO_BUFSIZE 65536 + +#define ERRMSG_MAX_LEN 2048 +#define CHUNK_SIZE (128 * 1024) +#define LARGE_CHUNK_SIZE (4 * 1024 * 1024) +#define OUT_BUF_SIZE (512 * 1024) + +/* retry attempts */ +#define PAGE_READ_ATTEMPTS 300 + +/* max size of note, that can be added to backup */ +#define MAX_NOTE_SIZE 1024 + +/* Check if an XLogRecPtr value is pointed to 0 offset */ +#define XRecOffIsNull(xlrp) \ + ((xlrp) % XLOG_BLCKSZ == 0) + +typedef struct RedoParams +{ + TimeLineID tli; + XLogRecPtr lsn; + uint32 checksum_version; +} RedoParams; + +typedef struct PageState +{ + uint16 checksum; + XLogRecPtr lsn; +} PageState; + +typedef struct db_map_entry +{ + Oid dbOid; + char *datname; +} db_map_entry; + +typedef enum IncrRestoreMode +{ + INCR_NONE, + INCR_CHECKSUM, + INCR_LSN +} IncrRestoreMode; + +typedef enum PartialRestoreType +{ + NONE, + INCLUDE, + EXCLUDE, +} PartialRestoreType; + typedef enum CompressAlg { NOT_DEFINED_COMPRESS = 0, @@ -82,38 +152,89 @@ typedef enum CompressAlg ZLIB_COMPRESS, } CompressAlg; +typedef enum ForkName +{ + vm, + fsm, + cfm, + init, + ptrack +} ForkName; + +#define INIT_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + INIT_CRC32C(crc); \ + else \ + INIT_TRADITIONAL_CRC32(crc); \ +} while (0) +#define COMP_FILE_CRC32(use_crc32c, crc, data, len) \ +do { \ + if (use_crc32c) \ + COMP_CRC32C((crc), (data), (len)); \ + else \ + COMP_TRADITIONAL_CRC32(crc, data, len); \ +} while (0) +#define FIN_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + FIN_CRC32C(crc); \ + else \ + FIN_TRADITIONAL_CRC32(crc); \ +} while (0) + + /* Information about single file (or dir) in backup */ typedef struct pgFile { - char *name; /* file or directory name */ + char *name; /* file or directory name */ mode_t mode; /* protection (file type and permission) */ size_t size; /* size of the file */ + time_t mtime; /* file st_mtime attribute, can be used only + during backup */ size_t read_size; /* size of the portion read (if only some pages are backed up, it's different from size) */ int64 write_size; /* size of the backed-up file. BYTES_INVALID means that the file existed but was not backed up because not modified since last backup. */ + size_t uncompressed_size; /* size of the backed-up file before compression + * and adding block headers. + */ /* we need int64 here to store '-1' value */ pg_crc32 crc; /* CRC value of the file, regular file only */ - char *linked; /* path of the linked file */ + char *rel_path; /* relative path of the file */ + char *linked; /* path of the linked file */ bool is_datafile; /* true if the file is PostgreSQL data file */ - char *path; /* absolute path of the file */ Oid tblspcOid; /* tblspcOid extracted from path, if applicable */ Oid dbOid; /* dbOid extracted from path, if applicable */ Oid relOid; /* relOid extracted from path, if applicable */ - char *forkName; /* forkName extracted from path, if applicable */ + ForkName forkName; /* forkName extracted from path, if applicable */ int segno; /* Segment number for ptrack */ - int n_blocks; /* size of the file in blocks, readed during DELTA backup */ + int n_blocks; /* number of blocks in the data file in data directory */ bool is_cfs; /* Flag to distinguish files compressed by CFS*/ - bool is_database; - bool exists_in_prev; /* Mark files, both data and regular, that exists in previous backup */ - CompressAlg compress_alg; /* compression algorithm applied to the file */ - volatile pg_atomic_flag lock; /* lock for synchronization of parallel threads */ - datapagemap_t pagemap; /* bitmap of pages updated since previous backup */ - bool pagemap_isabsent; /* Used to mark files with unknown state of pagemap, - * i.e. datafiles without _ptrack */ + bool is_database; /* Flag used strictly by ptrack 1.x backup */ + int external_dir_num; /* Number of external directory. 0 if not external */ + bool exists_in_prev; /* Mark files, both data and regular, that exists in previous backup */ + CompressAlg compress_alg; /* compression algorithm applied to the file */ + volatile pg_atomic_flag lock;/* lock for synchronization of parallel threads */ + datapagemap_t pagemap; /* bitmap of pages updated since previous backup + may take up to 16kB per file */ + bool pagemap_isabsent; /* Used to mark files with unknown state of pagemap, + * i.e. datafiles without _ptrack */ + /* Coordinates in header map */ + int n_headers; /* number of blocks in the data file in backup */ + pg_crc32 hdr_crc; /* CRC value of header file: name_hdr */ + off_t hdr_off; /* offset in header map */ + int hdr_size; /* offset in header map */ } pgFile; +typedef struct page_map_entry +{ + const char *path; /* file or directory name */ + char *pagemap; + size_t pagemapsize; +} page_map_entry; + /* Special values of datapagemap_t bitmapsize */ #define PageBitmapIsEmpty 0 /* Used to mark unchanged datafiles */ @@ -125,6 +246,8 @@ typedef enum BackupStatus BACKUP_STATUS_ERROR, /* aborted because of unexpected error */ BACKUP_STATUS_RUNNING, /* running backup */ BACKUP_STATUS_MERGING, /* merging backups */ + BACKUP_STATUS_MERGED, /* backup has been successfully merged and now awaits + * the assignment of new start_time */ BACKUP_STATUS_DELETING, /* data files are being deleted */ BACKUP_STATUS_DELETED, /* data files have been deleted */ BACKUP_STATUS_DONE, /* completed but not validated yet */ @@ -141,24 +264,6 @@ typedef enum BackupMode BACKUP_MODE_FULL /* full backup */ } BackupMode; -typedef enum ProbackupSubcmd -{ - NO_CMD = 0, - INIT_CMD, - ADD_INSTANCE_CMD, - DELETE_INSTANCE_CMD, - ARCHIVE_PUSH_CMD, - ARCHIVE_GET_CMD, - BACKUP_CMD, - RESTORE_CMD, - VALIDATE_CMD, - DELETE_CMD, - MERGE_CMD, - SHOW_CMD, - SET_CONFIG_CMD, - SHOW_CONFIG_CMD -} ProbackupSubcmd; - typedef enum ShowFormat { SHOW_PLAIN, @@ -168,60 +273,138 @@ typedef enum ShowFormat /* special values of pgBackup fields */ #define INVALID_BACKUP_ID 0 /* backup ID is not provided by user */ -#define BYTES_INVALID (-1) +#define BYTES_INVALID (-1) /* file didn`t changed since previous backup, DELTA backup do not rely on it */ +#define FILE_NOT_FOUND (-2) /* file disappeared during backup */ #define BLOCKNUM_INVALID (-1) +#define PROGRAM_VERSION "2.4.2" +#define AGENT_PROTOCOL_VERSION 20402 + -typedef struct pgBackupConfig +typedef struct ConnectionOptions { + const char *pgdatabase; + const char *pghost; + const char *pgport; + const char *pguser; +} ConnectionOptions; + +typedef struct ConnectionArgs +{ + PGconn *conn; + PGcancel *cancel_conn; +} ConnectionArgs; + +/* Store values for --remote-* option for 'restore_command' constructor */ +typedef struct ArchiveOptions +{ + const char *host; + const char *port; + const char *user; +} ArchiveOptions; + +/* + * An instance configuration. It can be stored in a configuration file or passed + * from command line. + */ +typedef struct InstanceConfig +{ + char *name; + char arclog_path[MAXPGPATH]; + char backup_instance_path[MAXPGPATH]; + uint64 system_identifier; - char *pgdata; - const char *pgdatabase; - const char *pghost; - const char *pgport; - const char *pguser; - - const char *master_host; - const char *master_port; - const char *master_db; - const char *master_user; - int replica_timeout; - - int archive_timeout; - - int log_level_console; - int log_level_file; - char *log_filename; - char *error_log_filename; - char *log_directory; - int log_rotation_size; - int log_rotation_age; + uint32 xlog_seg_size; + + char *pgdata; + char *external_dir_str; + + ConnectionOptions conn_opt; + ConnectionOptions master_conn_opt; + + uint32 replica_timeout; + + /* Wait timeout for WAL segment archiving */ + uint32 archive_timeout; + /* cmdline to be used as restore_command */ + char *restore_command; + + /* Logger parameters */ + LoggerConfig logger; + + /* Remote access parameters */ + RemoteConfig remote; + + /* Retention options. 0 disables the option. */ uint32 retention_redundancy; uint32 retention_window; + uint32 wal_depth; CompressAlg compress_alg; int compress_level; -} pgBackupConfig; + + /* Archive description */ + ArchiveOptions archive; +} InstanceConfig; + +extern ConfigOption instance_options[]; +extern InstanceConfig instance_config; +extern time_t current_time; + +typedef struct PGNodeInfo +{ + uint32 block_size; + uint32 wal_block_size; + uint32 checksum_version; + bool is_superuser; + bool pgpro_support; + + int server_version; + char server_version_str[100]; + + int ptrack_version_num; + bool is_ptrack_enable; + const char *ptrack_schema; /* used only for ptrack 2.x */ + +} PGNodeInfo; + +/* structure used for access to block header map */ +typedef struct HeaderMap +{ + char path[MAXPGPATH]; + char path_tmp[MAXPGPATH]; /* used only in merge */ + FILE *fp; /* used only for writing */ + char *buf; /* buffer */ + off_t offset; /* current position in fp */ + pthread_mutex_t mutex; + +} HeaderMap; typedef struct pgBackup pgBackup; /* Information about single backup stored in backup.conf */ -typedef struct pgBackup +struct pgBackup { BackupMode backup_mode; /* Mode - one of BACKUP_MODE_xxx above*/ time_t backup_id; /* Identifier of the backup. * Currently it's the same as start_time */ BackupStatus status; /* Status - one of BACKUP_STATUS_xxx above*/ - TimeLineID tli; /* timeline of start and stop baskup lsns */ + TimeLineID tli; /* timeline of start and stop backup lsns */ XLogRecPtr start_lsn; /* backup's starting transaction log location */ XLogRecPtr stop_lsn; /* backup's finishing transaction log location */ time_t start_time; /* since this moment backup has status * BACKUP_STATUS_RUNNING */ + time_t merge_dest_backup; /* start_time of incremental backup, + * this backup is merging with. + * Only available for FULL backups + * with MERGING or MERGED statuses */ + time_t merge_time; /* the moment when merge was started or 0 */ time_t end_time; /* the moment when backup was finished, or the moment * when we realized that backup is broken */ time_t recovery_time; /* Earliest moment for which you can restore * the state of the database cluster using * this backup */ + time_t expire_time; /* Backup expiration date */ TransactionId recovery_xid; /* Earliest xid for which you can restore * the state of the database cluster using * this backup */ @@ -232,8 +415,17 @@ typedef struct pgBackup * BYTES_INVALID means nothing was backed up. */ int64 data_bytes; - /* Size of WAL files in archive needed to restore this backup */ + /* Size of WAL files needed to replay on top of this + * backup to reach the consistency. + */ int64 wal_bytes; + /* Size of data files before applying compression and block header, + * WAL files are not included. + */ + int64 uncompressed_bytes; + + /* Size of data files in PGDATA at the moment of backup. */ + int64 pgdata_bytes; CompressAlg compress_alg; int compress_level; @@ -242,7 +434,6 @@ typedef struct pgBackup uint32 block_size; uint32 wal_block_size; uint32 checksum_version; - char program_version[100]; char server_version[100]; @@ -255,50 +446,93 @@ typedef struct pgBackup pgBackup *parent_backup_link; char *primary_conninfo; /* Connection parameters of the backup * in the format suitable for recovery.conf */ -} pgBackup; + char *external_dir_str; /* List of external directories, + * separated by ':' */ + char *root_dir; /* Full path for root backup directory: + backup_path/instance_name/backup_id */ + char *database_dir; /* Full path to directory with data files: + backup_path/instance_name/backup_id/database */ + parray *files; /* list of files belonging to this backup + * must be populated explicitly */ + char *note; + + pg_crc32 content_crc; + + /* map used for access to page headers */ + HeaderMap hdr_map; +}; /* Recovery target for restore and validate subcommands */ typedef struct pgRecoveryTarget { - bool time_specified; - time_t recovery_target_time; - /* add one more field in order to avoid deparsing recovery_target_time back */ - const char *target_time_string; - bool xid_specified; - TransactionId recovery_target_xid; - /* add one more field in order to avoid deparsing recovery_target_xid back */ - const char *target_xid_string; - bool lsn_specified; - XLogRecPtr recovery_target_lsn; - /* add one more field in order to avoid deparsing recovery_target_lsn back */ - const char *target_lsn_string; - TimeLineID recovery_target_tli; - bool recovery_target_inclusive; + time_t target_time; + /* add one more field in order to avoid deparsing target_time back */ + const char *time_string; + TransactionId target_xid; + /* add one more field in order to avoid deparsing target_xid back */ + const char *xid_string; + XLogRecPtr target_lsn; + /* add one more field in order to avoid deparsing target_lsn back */ + const char *lsn_string; + TimeLineID target_tli; + bool target_inclusive; bool inclusive_specified; - bool recovery_target_immediate; - const char *recovery_target_name; - const char *recovery_target_action; - bool restore_no_validate; + const char *target_stop; + const char *target_name; + const char *target_action; } pgRecoveryTarget; -/* Union to ease operations on relation pages */ -typedef union DataPage +/* Options needed for restore and validate commands */ +typedef struct pgRestoreParams +{ + bool force; + bool is_restore; + bool no_validate; + bool restore_as_replica; + bool skip_external_dirs; + bool skip_block_validation; //Start using it + const char *restore_command; + const char *primary_slot_name; + const char *primary_conninfo; + + /* options for incremental restore */ + IncrRestoreMode incremental_mode; + XLogRecPtr shift_lsn; + + /* options for partial restore */ + PartialRestoreType partial_restore_type; + parray *partial_db_list; +} pgRestoreParams; + +/* Options needed for set-backup command */ +typedef struct pgSetBackupParams { - PageHeaderData page_data; - char data[BLCKSZ]; -} DataPage; + int64 ttl; /* amount of time backup must be pinned + * -1 - do nothing + * 0 - disable pinning + */ + time_t expire_time; /* Point in time until backup + * must be pinned. + */ + char *note; +} pgSetBackupParams; typedef struct { + PGNodeInfo *nodeInfo; + const char *from_root; const char *to_root; + const char *external_prefix; parray *files_list; parray *prev_filelist; + parray *external_dirs; XLogRecPtr prev_start_lsn; - PGconn *backup_conn; - PGcancel *cancel_conn; + ConnectionArgs conn_arg; + int thread_num; + HeaderMap *hdr_map; /* * Return value from the thread. @@ -307,9 +541,97 @@ typedef struct int ret; } backup_files_arg; + +typedef struct timelineInfo timelineInfo; + +/* struct to collect info about timelines in WAL archive */ +struct timelineInfo { + + TimeLineID tli; /* this timeline */ + TimeLineID parent_tli; /* parent timeline. 0 if none */ + timelineInfo *parent_link; /* link to parent timeline */ + XLogRecPtr switchpoint; /* if this timeline has a parent, then + * switchpoint contains switchpoint LSN, + * otherwise 0 */ + XLogSegNo begin_segno; /* first present segment in this timeline */ + XLogSegNo end_segno; /* last present segment in this timeline */ + size_t n_xlog_files; /* number of segments (only really existing) + * does not include lost segments */ + size_t size; /* space on disk taken by regular WAL files */ + parray *backups; /* array of pgBackup sturctures with info + * about backups belonging to this timeline */ + parray *xlog_filelist; /* array of ordinary WAL segments, '.partial' + * and '.backup' files belonging to this timeline */ + parray *lost_segments; /* array of intervals of lost segments */ + parray *keep_segments; /* array of intervals of segments used by WAL retention */ + pgBackup *closest_backup; /* link to valid backup, closest to timeline */ + pgBackup *oldest_backup; /* link to oldest backup on timeline */ + XLogRecPtr anchor_lsn; /* LSN belonging to the oldest segno to keep for 'wal-depth' */ + TimeLineID anchor_tli; /* timeline of anchor_lsn */ +}; + +typedef struct xlogInterval +{ + XLogSegNo begin_segno; + XLogSegNo end_segno; +} xlogInterval; + +typedef struct lsnInterval +{ + TimeLineID tli; + XLogRecPtr begin_lsn; + XLogRecPtr end_lsn; +} lsnInterval; + +typedef enum xlogFileType +{ + SEGMENT, + TEMP_SEGMENT, + PARTIAL_SEGMENT, + BACKUP_HISTORY_FILE +} xlogFileType; + +typedef struct xlogFile +{ + pgFile file; + XLogSegNo segno; + xlogFileType type; + bool keep; /* Used to prevent removal of WAL segments + * required by ARCHIVE backups. */ +} xlogFile; + + +/* + * When copying datafiles to backup we validate and compress them block + * by block. Thus special header is required for each data block. + */ +typedef struct BackupPageHeader +{ + BlockNumber block; /* block number */ + int32 compressed_size; +} BackupPageHeader; + +/* 4MB for 1GB file */ +typedef struct BackupPageHeader2 +{ + XLogRecPtr lsn; + int32 block; /* block number */ + int32 pos; /* position in backup file */ + uint16 checksum; +} BackupPageHeader2; + +/* Special value for compressed_size field */ +#define PageIsOk 0 +#define SkipCurrentPage -1 +#define PageIsTruncated -2 +#define PageIsCorrupted -3 /* used by checkdb */ + + /* * return pointer that exceeds the length of prefix from character string. * ex. str="/xxx/yyy/zzz", prefix="/xxx/yyy", return="zzz". + * + * Deprecated. Do not use this in new code. */ #define GetRelativePath(str, prefix) \ ((strlen(str) <= strlen(prefix)) ? "" : str + strlen(prefix) + 1) @@ -326,136 +648,194 @@ typedef struct strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ strcmp((fname) + XLOG_FNAME_LEN, ".gz") == 0) +#if PG_VERSION_NUM >= 110000 +#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes) +#define GetXLogRecPtr(segno, offset, wal_segsz_bytes, dest) \ + XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest) +#define GetXLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFileName(fname, tli, logSegNo, wal_segsz_bytes) +#define IsInXLogSeg(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes) +#define GetXLogSegName(fname, logSegNo, wal_segsz_bytes) \ + snprintf(fname, 20, "%08X%08X", \ + (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) + +#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \ + logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg + +#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) +#else +#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteToSeg(xlrp, logSegNo) +#define GetXLogRecPtr(segno, offset, wal_segsz_bytes, dest) \ + XLogSegNoOffsetToRecPtr(segno, offset, dest) +#define GetXLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFileName(fname, tli, logSegNo) +#define IsInXLogSeg(xlrp, logSegNo, wal_segsz_bytes) \ + XLByteInSeg(xlrp, logSegNo) +#define GetXLogSegName(fname, logSegNo, wal_segsz_bytes) \ + snprintf(fname, 20, "%08X%08X",\ + (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId)) + +#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \ + logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg + +#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \ + XLogFromFileName(fname, tli, logSegNo) +#endif + +#define IsPartialCompressXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.partial") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".gz.partial") == 0) + +#define IsTempXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".part") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".part") == 0) + +#define IsTempCompressXLogFileName(fname) \ + (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.part") && \ + strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ + strcmp((fname) + XLOG_FNAME_LEN, ".gz.part") == 0) + +#define IsSshProtocol() (instance_config.remote.host && strcmp(instance_config.remote.proto, "ssh") == 0) + /* directory options */ extern char *backup_path; extern char backup_instance_path[MAXPGPATH]; -extern char *pgdata; extern char arclog_path[MAXPGPATH]; /* common options */ +extern pid_t my_pid; +extern __thread int my_thread_num; extern int num_threads; extern bool stream_wal; extern bool progress; +extern bool is_archive_cmd; /* true for archive-{get,push} */ #if PG_VERSION_NUM >= 100000 /* In pre-10 'replication_slot' is defined in receivelog.h */ extern char *replication_slot; #endif +extern bool temp_slot; /* backup options */ extern bool smooth_checkpoint; -#define ARCHIVE_TIMEOUT_DEFAULT 300 -extern uint32 archive_timeout; -extern bool is_remote_backup; -extern const char *master_db; -extern const char *master_host; -extern const char *master_port; -extern const char *master_user; -#define REPLICA_TIMEOUT_DEFAULT 300 -extern uint32 replica_timeout; - -extern bool is_ptrack_support; -extern bool is_checksum_enabled; -extern bool exclusive_backup; -/* restore options */ -extern bool restore_as_replica; +/* remote probackup options */ +extern char* remote_agent; + +extern bool exclusive_backup; /* delete options */ extern bool delete_wal; extern bool delete_expired; -extern bool apply_to_all; -extern bool force_delete; - -/* retention options. 0 disables the option */ -#define RETENTION_REDUNDANCY_DEFAULT 0 -#define RETENTION_WINDOW_DEFAULT 0 - -extern uint32 retention_redundancy; -extern uint32 retention_window; +extern bool merge_expired; +extern bool dry_run; /* compression options */ -extern CompressAlg compress_alg; -extern int compress_level; extern bool compress_shortcut; -#define COMPRESS_ALG_DEFAULT NOT_DEFINED_COMPRESS -#define COMPRESS_LEVEL_DEFAULT 1 - -extern CompressAlg parse_compress_alg(const char *arg); -extern const char* deparse_compress_alg(int alg); /* other options */ extern char *instance_name; -extern uint64 system_identifier; /* show options */ extern ShowFormat show_format; +/* checkdb options */ +extern bool heapallindexed; +extern bool skip_block_validation; + /* current settings */ extern pgBackup current; -extern ProbackupSubcmd backup_subcmd; + +/* argv of the process */ +extern char** commands_args; /* in dir.c */ /* exclude directory list for $PGDATA file listing */ extern const char *pgdata_exclude_dir[]; /* in backup.c */ -extern int do_backup(time_t start_time); +extern int do_backup(time_t start_time, pgSetBackupParams *set_backup_params, + bool no_validate, bool no_sync, bool backup_logs); +extern void do_checkdb(bool need_amcheck, ConnectionOptions conn_opt, + char *pgdata); extern BackupMode parse_backup_mode(const char *value); extern const char *deparse_backup_mode(BackupMode mode); extern void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno); -extern char *pg_ptrack_get_block(backup_files_arg *arguments, - Oid dbOid, Oid tblsOid, Oid relOid, - BlockNumber blknum, - size_t *result_size); +extern char *pg_ptrack_get_block(ConnectionArgs *arguments, + Oid dbOid, Oid tblsOid, Oid relOid, + BlockNumber blknum, size_t *result_size, + int ptrack_version_num, const char *ptrack_schema); /* in restore.c */ extern int do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, - bool is_restore); + pgRestoreParams *params, + bool no_sync); extern bool satisfy_timeline(const parray *timelines, const pgBackup *backup); extern bool satisfy_recovery_target(const pgBackup *backup, const pgRecoveryTarget *rt); -extern parray * readTimeLineHistory_probackup(TimeLineID targetTLI); extern pgRecoveryTarget *parseRecoveryTargetOptions( const char *target_time, const char *target_xid, const char *target_inclusive, TimeLineID target_tli, const char* target_lsn, - bool target_immediate, const char *target_name, - const char *target_action, bool restore_no_validate); + const char *target_stop, const char *target_name, + const char *target_action); + +extern parray *get_dbOid_exclude_list(pgBackup *backup, parray *datname_list, + PartialRestoreType partial_restore_type); + +extern parray *get_backup_filelist(pgBackup *backup, bool strict); +extern parray *read_timeline_history(const char *arclog_path, TimeLineID targetTLI, bool strict); +extern bool tliIsPartOfHistory(const parray *timelines, TimeLineID tli); /* in merge.c */ extern void do_merge(time_t backup_id); +extern void merge_backups(pgBackup *backup, pgBackup *next_backup); +extern void merge_chain(parray *parent_chain, + pgBackup *full_backup, pgBackup *dest_backup); + +extern parray *read_database_map(pgBackup *backup); /* in init.c */ extern int do_init(void); -extern int do_add_instance(void); +extern int do_add_instance(InstanceConfig *instance); /* in archive.c */ -extern int do_archive_push(char *wal_file_path, char *wal_file_name, - bool overwrite); -extern int do_archive_get(char *wal_file_path, char *wal_file_name); - +extern void do_archive_push(InstanceConfig *instance, char *wal_file_path, + char *wal_file_name, int batch_size, bool overwrite, + bool no_sync, bool no_ready_rename); +extern void do_archive_get(InstanceConfig *instance, const char *prefetch_dir_arg, char *wal_file_path, + char *wal_file_name, int batch_size, bool validate_wal); /* in configure.c */ -extern int do_configure(bool show_only); -extern void pgBackupConfigInit(pgBackupConfig *config); -extern void writeBackupCatalogConfig(FILE *out, pgBackupConfig *config); -extern void writeBackupCatalogConfigFile(pgBackupConfig *config); -extern pgBackupConfig* readBackupCatalogConfigFile(void); +extern void do_show_config(void); +extern void do_set_config(bool missing_ok); +extern void init_config(InstanceConfig *config, const char *instance_name); +extern InstanceConfig *readInstanceConfigFile(const char *instance_name); /* in show.c */ -extern int do_show(time_t requested_backup_id); +extern int do_show(const char *instance_name, time_t requested_backup_id, bool show_archive); /* in delete.c */ -extern int do_delete(time_t backup_id); -extern int do_retention_purge(void); +extern void do_delete(time_t backup_id); +extern void delete_backup_files(pgBackup *backup); +extern void do_retention(void); extern int do_delete_instance(void); +extern void do_delete_status(InstanceConfig *instance_config, const char *status); /* in fetch.c */ extern char *slurpFile(const char *datadir, const char *path, size_t *filesize, - bool safe); + bool safe, + fio_location location); extern char *fetchFile(PGconn *conn, const char *filename, size_t *filesize); /* in help.c */ @@ -463,129 +843,333 @@ extern void help_pg_probackup(void); extern void help_command(char *command); /* in validate.c */ -extern void pgBackupValidate(pgBackup* backup); +extern void pgBackupValidate(pgBackup* backup, pgRestoreParams *params); extern int do_validate_all(void); +extern int validate_one_page(Page page, BlockNumber absolute_blkno, + XLogRecPtr stop_lsn, PageState *page_st, + uint32 checksum_version); + +/* return codes for validate_one_page */ +/* TODO: use enum */ +#define PAGE_IS_VALID (-1) +#define PAGE_IS_NOT_FOUND (-2) +#define PAGE_IS_ZEROED (-3) +#define PAGE_HEADER_IS_INVALID (-4) +#define PAGE_CHECKSUM_MISMATCH (-5) +#define PAGE_LSN_FROM_FUTURE (-6) /* in catalog.c */ -extern pgBackup *read_backup(time_t timestamp); +extern pgBackup *read_backup(const char *root_dir); +extern void write_backup(pgBackup *backup, bool strict); +extern void write_backup_status(pgBackup *backup, BackupStatus status, + const char *instance_name, bool strict); +extern void write_backup_data_bytes(pgBackup *backup); +extern bool lock_backup(pgBackup *backup, bool strict); + extern const char *pgBackupGetBackupMode(pgBackup *backup); -extern parray *catalog_get_backup_list(time_t requested_backup_id); +extern parray *catalog_get_instance_list(void); +extern parray *catalog_get_backup_list(const char *instance_name, time_t requested_backup_id); +extern void catalog_lock_backup_list(parray *backup_list, int from_idx, + int to_idx, bool strict); extern pgBackup *catalog_get_last_data_backup(parray *backup_list, - TimeLineID tli); -extern void catalog_lock(void); + TimeLineID tli, + time_t current_start_time); +extern pgBackup *get_multi_timeline_parent(parray *backup_list, parray *tli_list, + TimeLineID current_tli, time_t current_start_time, + InstanceConfig *instance); +extern void timelineInfoFree(void *tliInfo); +extern parray *catalog_get_timelines(InstanceConfig *instance); +extern void do_set_backup(const char *instance_name, time_t backup_id, + pgSetBackupParams *set_backup_params); +extern void pin_backup(pgBackup *target_backup, + pgSetBackupParams *set_backup_params); +extern void add_note(pgBackup *target_backup, char *note); extern void pgBackupWriteControl(FILE *out, pgBackup *backup); -extern void pgBackupWriteBackupControlFile(pgBackup *backup); -extern void pgBackupWriteFileList(pgBackup *backup, parray *files, - const char *root); +extern void write_backup_filelist(pgBackup *backup, parray *files, + const char *root, parray *external_list, bool sync); -extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, const char *subdir); +extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, + const char *subdir); extern void pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, const char *subdir1, const char *subdir2); +extern void pgBackupGetPathInInstance(const char *instance_name, + const pgBackup *backup, char *path, size_t len, + const char *subdir1, const char *subdir2); extern int pgBackupCreateDir(pgBackup *backup); +extern void pgNodeInit(PGNodeInfo *node); extern void pgBackupInit(pgBackup *backup); -extern void pgBackupCopy(pgBackup *dst, pgBackup *src); extern void pgBackupFree(void *backup); extern int pgBackupCompareId(const void *f1, const void *f2); extern int pgBackupCompareIdDesc(const void *f1, const void *f2); +extern int pgBackupCompareIdEqual(const void *l, const void *r); + +extern pgBackup* find_parent_full_backup(pgBackup *current_backup); +extern int scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup); +/* return codes for scan_parent_chain */ +#define ChainIsBroken 0 +#define ChainIsInvalid 1 +#define ChainIsOk 2 + +extern bool is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive); +extern bool is_prolific(parray *backup_list, pgBackup *target_backup); +extern int get_backup_index_number(parray *backup_list, pgBackup *backup); +extern void append_children(parray *backup_list, pgBackup *target_backup, parray *append_list); +extern bool launch_agent(void); +extern void launch_ssh(char* argv[]); +extern void wait_ssh(void); -extern pgBackup* find_parent_backup(pgBackup *current_backup); +#define COMPRESS_ALG_DEFAULT NOT_DEFINED_COMPRESS +#define COMPRESS_LEVEL_DEFAULT 1 + +extern CompressAlg parse_compress_alg(const char *arg); +extern const char* deparse_compress_alg(int alg); /* in dir.c */ extern void dir_list_file(parray *files, const char *root, bool exclude, - bool omit_symlink, bool add_root); -extern void create_data_directories(const char *data_dir, - const char *backup_dir, - bool extract_tablespaces); + bool follow_symlink, bool add_root, bool backup_logs, + bool skip_hidden, int external_dir_num, fio_location location); -extern void read_tablespace_map(parray *files, const char *backup_dir); -extern void opt_tablespace_map(pgut_option *opt, const char *arg); -extern void check_tablespace_mapping(pgBackup *backup); +extern void create_data_directories(parray *dest_files, + const char *data_dir, + const char *backup_dir, + bool extract_tablespaces, + bool incremental, + fio_location location); -extern void print_file_list(FILE *out, const parray *files, const char *root); -extern parray *dir_read_file_list(const char *root, const char *file_txt); +extern void read_tablespace_map(parray *files, const char *backup_dir); +extern void opt_tablespace_map(ConfigOption *opt, const char *arg); +extern void opt_externaldir_map(ConfigOption *opt, const char *arg); +extern void check_tablespace_mapping(pgBackup *backup, bool incremental, bool *tblspaces_are_empty); +extern void check_external_dir_mapping(pgBackup *backup, bool incremental); +extern char *get_external_remap(char *current_dir); + +extern void print_database_map(FILE *out, parray *database_list); +extern void write_database_map(pgBackup *backup, parray *database_list, + parray *backup_file_list); +extern void db_map_entry_free(void *map); + +extern void print_file_list(FILE *out, const parray *files, const char *root, + const char *external_prefix, parray *external_list); +extern parray *dir_read_file_list(const char *root, const char *external_prefix, + const char *file_txt, fio_location location, pg_crc32 expected_crc); +extern parray *make_external_directory_list(const char *colon_separated_dirs, + bool remap); +extern void free_dir_list(parray *list); +extern void makeExternalDirPathByNum(char *ret_path, const char *pattern_path, + const int dir_num); +extern bool backup_contains_external(const char *dir, parray *dirs_list); extern int dir_create_dir(const char *path, mode_t mode); -extern bool dir_is_empty(const char *path); +extern bool dir_is_empty(const char *path, fio_location location); -extern bool fileExists(const char *path); +extern bool fileExists(const char *path, fio_location location); extern size_t pgFileSize(const char *path); -extern pgFile *pgFileNew(const char *path, bool omit_symlink); -extern pgFile *pgFileInit(const char *path); -extern void pgFileDelete(pgFile *file); +extern pgFile *pgFileNew(const char *path, const char *rel_path, + bool follow_symlink, int external_dir_num, + fio_location location); +extern pgFile *pgFileInit(const char *rel_path); +extern void pgFileDelete(mode_t mode, const char *full_path); +extern void fio_pgFileDelete(pgFile *file, const char *full_path); + extern void pgFileFree(void *file); -extern pg_crc32 pgFileGetCRC(const char *file_path); -extern int pgFileComparePath(const void *f1, const void *f2); -extern int pgFileComparePathDesc(const void *f1, const void *f2); + +extern pg_crc32 pgFileGetCRC(const char *file_path, bool missing_ok, bool use_crc32c); +extern pg_crc32 pgFileGetCRCgz(const char *file_path, bool missing_ok, bool use_crc32c); + +extern int pgFileMapComparePath(const void *f1, const void *f2); +extern int pgFileCompareName(const void *f1, const void *f2); +extern int pgFileCompareRelPathWithExternal(const void *f1, const void *f2); +extern int pgFileCompareRelPathWithExternalDesc(const void *f1, const void *f2); extern int pgFileCompareLinked(const void *f1, const void *f2); extern int pgFileCompareSize(const void *f1, const void *f2); +extern int pgCompareOid(const void *f1, const void *f2); /* in data.c */ -extern bool backup_data_file(backup_files_arg* arguments, - const char *to_path, pgFile *file, - XLogRecPtr prev_backup_start_lsn, - BackupMode backup_mode, - CompressAlg calg, int clevel); -extern void restore_data_file(const char *to_path, - pgFile *file, bool allow_truncate, - bool write_header); -extern bool copy_file(const char *from_root, const char *to_root, pgFile *file); -extern void move_file(const char *from_root, const char *to_root, pgFile *file); -extern void push_wal_file(const char *from_path, const char *to_path, - bool is_compress, bool overwrite); -extern void get_wal_file(const char *from_path, const char *to_path); - -extern bool calc_file_checksum(pgFile *file); - +extern bool check_data_file(ConnectionArgs *arguments, pgFile *file, + const char *from_fullpath, uint32 checksum_version); + +extern void backup_data_file(ConnectionArgs* conn_arg, pgFile *file, + const char *from_fullpath, const char *to_fullpath, + XLogRecPtr prev_backup_start_lsn, BackupMode backup_mode, + CompressAlg calg, int clevel, uint32 checksum_version, + int ptrack_version_num, const char *ptrack_schema, + HeaderMap *hdr_map, bool missing_ok); +extern void backup_non_data_file(pgFile *file, pgFile *prev_file, + const char *from_fullpath, const char *to_fullpath, + BackupMode backup_mode, time_t parent_backup_time, + bool missing_ok); +extern void backup_non_data_file_internal(const char *from_fullpath, + fio_location from_location, + const char *to_fullpath, pgFile *file, + bool missing_ok); + +extern size_t restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, + const char *to_fullpath, bool use_bitmap, PageState *checksum_map, + XLogRecPtr shift_lsn, datapagemap_t *lsn_map, bool use_headers); +extern size_t restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_version, + const char *from_fullpath, const char *to_fullpath, int nblocks, + datapagemap_t *map, PageState *checksum_map, int checksum_version, + datapagemap_t *lsn_map, BackupPageHeader2 *headers); +extern size_t restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, + pgFile *dest_file, FILE *out, const char *to_fullpath, + bool already_exists); +extern void restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, + const char *from_fullpath, const char *to_fullpath); +extern bool create_empty_file(fio_location from_location, const char *to_root, + fio_location to_location, pgFile *file); + +extern PageState *get_checksum_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr dest_stop_lsn, BlockNumber segmentno); +extern datapagemap_t *get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr shift_lsn, BlockNumber segmentno); +extern pid_t check_postmaster(const char *pgdata); + +extern bool validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version, HeaderMap *hdr_map); + +extern BackupPageHeader2* get_data_file_headers(HeaderMap *hdr_map, pgFile *file, uint32 backup_version, bool strict); +extern void write_page_headers(BackupPageHeader2 *headers, pgFile *file, HeaderMap *hdr_map, bool is_merge); +extern void init_header_map(pgBackup *backup); +extern void cleanup_header_map(HeaderMap *hdr_map); /* parsexlog.c */ -extern void extractPageMap(const char *datadir, - XLogRecPtr startpoint, - TimeLineID tli, - XLogRecPtr endpoint, bool prev_seg, - parray *backup_files_list); -extern void validate_wal(pgBackup *backup, - const char *archivedir, - time_t target_time, - TransactionId target_xid, - XLogRecPtr target_lsn, - TimeLineID tli); +extern bool extractPageMap(const char *archivedir, uint32 wal_seg_size, + XLogRecPtr startpoint, TimeLineID start_tli, + XLogRecPtr endpoint, TimeLineID end_tli, + parray *tli_list); +extern void validate_wal(pgBackup *backup, const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, TimeLineID tli, + uint32 seg_size); +extern bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, + const char *prefetch_dir, uint32 wal_seg_size); extern bool read_recovery_info(const char *archivedir, TimeLineID tli, + uint32 seg_size, XLogRecPtr start_lsn, XLogRecPtr stop_lsn, - time_t *recovery_time, - TransactionId *recovery_xid); + time_t *recovery_time); extern bool wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, - TimeLineID target_tli); + TimeLineID target_tli, uint32 seg_size); +extern XLogRecPtr get_prior_record_lsn(const char *archivedir, XLogRecPtr start_lsn, + XLogRecPtr stop_lsn, TimeLineID tli, + bool seek_prev_segment, uint32 seg_size); + +extern XLogRecPtr get_first_record_lsn(const char *archivedir, XLogRecPtr start_lsn, + TimeLineID tli, uint32 wal_seg_size, int timeout); +extern XLogRecPtr get_next_record_lsn(const char *archivedir, XLogSegNo segno, TimeLineID tli, + uint32 wal_seg_size, int timeout, XLogRecPtr target); /* in util.c */ -extern TimeLineID get_current_timeline(bool safe); -extern void sanityChecks(void); +extern TimeLineID get_current_timeline(PGconn *conn); +extern TimeLineID get_current_timeline_from_control(bool safe); +extern XLogRecPtr get_checkpoint_location(PGconn *conn); +extern uint64 get_system_identifier(const char *pgdata_path); +extern uint64 get_remote_system_identifier(PGconn *conn); +extern uint32 get_data_checksum_version(bool safe); +extern pg_crc32c get_pgcontrol_checksum(const char *pgdata_path); +extern uint32 get_xlog_seg_size(char *pgdata_path); +extern void get_redo(const char *pgdata_path, RedoParams *redo); +extern void set_min_recovery_point(pgFile *file, const char *backup_path, + XLogRecPtr stop_backup_lsn); +extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, + const char *to_fullpath, fio_location to_location, pgFile *file); + extern void time2iso(char *buf, size_t len, time_t time); extern const char *status2str(BackupStatus status); -extern void remove_trailing_space(char *buf, int comment_mark); -extern void remove_not_digit(char *buf, size_t len, const char *str); -extern uint32 get_data_checksum_version(bool safe); +extern BackupStatus str2status(const char *status); extern const char *base36enc(long unsigned int value); extern char *base36enc_dup(long unsigned int value); extern long unsigned int base36dec(const char *text); -extern uint64 get_system_identifier(char *pgdata); -extern uint64 get_remote_system_identifier(PGconn *conn); -extern pg_time_t timestamptz_to_time_t(TimestampTz t); -extern int parse_server_version(char *server_version_str); - -/* in status.c */ -extern bool is_pg_running(void); - -#ifdef WIN32 -#ifdef _DEBUG -#define lseek _lseek -#define open _open -#define fstat _fstat -#define read _read -#define close _close -#define write _write -#define mkdir(dir,mode) _mkdir(dir) -#endif -#endif +extern uint32 parse_server_version(const char *server_version_str); +extern uint32 parse_program_version(const char *program_version); +extern bool parse_page(Page page, XLogRecPtr *lsn); +extern int32 do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, + CompressAlg alg, int level, const char **errormsg); +extern int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, + CompressAlg alg, const char **errormsg); + +extern void pretty_size(int64 size, char *buf, size_t len); +extern void pretty_time_interval(double time, char *buf, size_t len); + +extern PGconn *pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo); +extern void check_system_identifiers(PGconn *conn, char *pgdata); +extern void parse_filelist_filenames(parray *files, const char *root); + +/* in ptrack.c */ +extern void make_pagemap_from_ptrack_1(parray* files, PGconn* backup_conn); +extern void make_pagemap_from_ptrack_2(parray* files, PGconn* backup_conn, + const char *ptrack_schema, + int ptrack_version_num, + XLogRecPtr lsn); +extern void pg_ptrack_clear(PGconn *backup_conn, int ptrack_version_num); +extern void get_ptrack_version(PGconn *backup_conn, PGNodeInfo *nodeInfo); +extern bool pg_ptrack_enable(PGconn *backup_conn, int ptrack_version_num); +extern bool pg_ptrack_get_and_clear_db(Oid dbOid, Oid tblspcOid, PGconn *backup_conn); +extern char *pg_ptrack_get_and_clear(Oid tablespace_oid, + Oid db_oid, + Oid rel_oid, + size_t *result_size, + PGconn *backup_conn); +extern XLogRecPtr get_last_ptrack_lsn(PGconn *backup_conn, PGNodeInfo *nodeInfo); +extern parray * pg_ptrack_get_pagemapset(PGconn *backup_conn, const char *ptrack_schema, + int ptrack_version_num, XLogRecPtr lsn); + +/* open local file to writing */ +extern FILE* open_local_file_rw(const char *to_fullpath, char **out_buf, uint32 buf_size); + +extern int send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_fullpath, + pgFile *file, XLogRecPtr prev_backup_start_lsn, CompressAlg calg, int clevel, + uint32 checksum_version, bool use_pagemap, BackupPageHeader2 **headers, + BackupMode backup_mode, int ptrack_version_num, const char *ptrack_schema); + +/* FIO */ +extern void fio_delete(mode_t mode, const char *fullpath, fio_location location); +extern int fio_send_pages(const char *to_fullpath, const char *from_fullpath, pgFile *file, + XLogRecPtr horizonLsn, int calg, int clevel, uint32 checksum_version, + bool use_pagemap, BlockNumber *err_blknum, char **errormsg, + BackupPageHeader2 **headers); +/* return codes for fio_send_pages */ +extern int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, char **errormsg); +extern int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, + pgFile *file, char **errormsg); + +extern void fio_list_dir(parray *files, const char *root, bool exclude, bool follow_symlink, + bool add_root, bool backup_logs, bool skip_hidden, int external_dir_num); + +extern bool pgut_rmtree(const char *path, bool rmtopdir, bool strict); + +extern PageState *fio_get_checksum_map(const char *fullpath, uint32 checksum_version, int n_blocks, + XLogRecPtr dest_stop_lsn, BlockNumber segmentno, fio_location location); + +extern datapagemap_t *fio_get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr horizonLsn, BlockNumber segmentno, + fio_location location); +extern pid_t fio_check_postmaster(const char *pgdata, fio_location location); + +extern int32 fio_decompress(void* dst, void const* src, size_t size, int compress_alg); + +/* return codes for fio_send_pages() and fio_send_file() */ +#define SEND_OK (0) +#define FILE_MISSING (-1) +#define OPEN_FAILED (-2) +#define READ_FAILED (-3) +#define WRITE_FAILED (-4) +#define ZLIB_ERROR (-5) +#define REMOTE_ERROR (-6) +#define PAGE_CORRUPTION (-8) + +/* Check if specified location is local for current node */ +extern bool fio_is_remote(fio_location location); +extern bool fio_is_remote_simple(fio_location location); + +extern void get_header_errormsg(Page page, char **errormsg); +extern void get_checksum_errormsg(Page page, char **errormsg, + BlockNumber absolute_blkno); + +extern bool +datapagemap_is_set(datapagemap_t *map, BlockNumber blkno); + +extern void +datapagemap_print_debug(datapagemap_t *map); #endif /* PG_PROBACKUP_H */ diff --git a/src/ptrack.c b/src/ptrack.c new file mode 100644 index 000000000..3f2591137 --- /dev/null +++ b/src/ptrack.c @@ -0,0 +1,706 @@ +/*------------------------------------------------------------------------- + * + * ptrack.c: support functions for ptrack backups + * + * Copyright (c) 2019 Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#if PG_VERSION_NUM < 110000 +#include "catalog/catalog.h" +#endif +#include "catalog/pg_tablespace.h" + +/* + * Macro needed to parse ptrack. + * NOTE Keep those values synchronized with definitions in ptrack.h + */ +#define PTRACK_BITS_PER_HEAPBLOCK 1 +#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / PTRACK_BITS_PER_HEAPBLOCK) + +/* + * Given a list of files in the instance to backup, build a pagemap for each + * data file that has ptrack. Result is saved in the pagemap field of pgFile. + * NOTE we rely on the fact that provided parray is sorted by file->rel_path. + */ +void +make_pagemap_from_ptrack_1(parray *files, PGconn *backup_conn) +{ + size_t i; + Oid dbOid_with_ptrack_init = 0; + Oid tblspcOid_with_ptrack_init = 0; + char *ptrack_nonparsed = NULL; + size_t ptrack_nonparsed_size = 0; + + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + size_t start_addr; + + /* + * If there is a ptrack_init file in the database, + * we must backup all its files, ignoring ptrack files for relations. + */ + if (file->is_database) + { + /* + * The function pg_ptrack_get_and_clear_db returns true + * if there was a ptrack_init file. + * Also ignore ptrack files for global tablespace, + * to avoid any possible specific errors. + */ + if ((file->tblspcOid == GLOBALTABLESPACE_OID) || + pg_ptrack_get_and_clear_db(file->dbOid, file->tblspcOid, backup_conn)) + { + dbOid_with_ptrack_init = file->dbOid; + tblspcOid_with_ptrack_init = file->tblspcOid; + } + } + + if (file->is_datafile) + { + if (file->tblspcOid == tblspcOid_with_ptrack_init && + file->dbOid == dbOid_with_ptrack_init) + { + /* ignore ptrack if ptrack_init exists */ + elog(VERBOSE, "Ignoring ptrack because of ptrack_init for file: %s", file->rel_path); + file->pagemap_isabsent = true; + continue; + } + + /* get ptrack bitmap once for all segments of the file */ + if (file->segno == 0) + { + /* release previous value */ + pg_free(ptrack_nonparsed); + ptrack_nonparsed_size = 0; + + ptrack_nonparsed = pg_ptrack_get_and_clear(file->tblspcOid, file->dbOid, + file->relOid, &ptrack_nonparsed_size, backup_conn); + } + + if (ptrack_nonparsed != NULL) + { + /* + * pg_ptrack_get_and_clear() returns ptrack with VARHDR cut out. + * Compute the beginning of the ptrack map related to this segment + * + * HEAPBLOCKS_PER_BYTE. Number of heap pages one ptrack byte can track: 8 + * RELSEG_SIZE. Number of Pages per segment: 131072 + * RELSEG_SIZE/HEAPBLOCKS_PER_BYTE. number of bytes in ptrack file needed + * to keep track on one relsegment: 16384 + */ + start_addr = (RELSEG_SIZE/HEAPBLOCKS_PER_BYTE)*file->segno; + + /* + * If file segment was created after we have read ptrack, + * we won't have a bitmap for this segment. + */ + if (start_addr > ptrack_nonparsed_size) + { + elog(VERBOSE, "Ptrack is missing for file: %s", file->rel_path); + file->pagemap_isabsent = true; + } + else + { + + if (start_addr + RELSEG_SIZE/HEAPBLOCKS_PER_BYTE > ptrack_nonparsed_size) + { + file->pagemap.bitmapsize = ptrack_nonparsed_size - start_addr; + elog(VERBOSE, "pagemap size: %i", file->pagemap.bitmapsize); + } + else + { + file->pagemap.bitmapsize = RELSEG_SIZE/HEAPBLOCKS_PER_BYTE; + elog(VERBOSE, "pagemap size: %i", file->pagemap.bitmapsize); + } + + file->pagemap.bitmap = pg_malloc(file->pagemap.bitmapsize); + memcpy(file->pagemap.bitmap, ptrack_nonparsed+start_addr, file->pagemap.bitmapsize); + } + } + else + { + /* + * If ptrack file is missing, try to copy the entire file. + * It can happen in two cases: + * - files were created by commands that bypass buffer manager + * and, correspondingly, ptrack mechanism. + * i.e. CREATE DATABASE + * - target relation was deleted. + */ + elog(VERBOSE, "Ptrack is missing for file: %s", file->rel_path); + file->pagemap_isabsent = true; + } + } + } +} + +/* Check if the instance supports compatible version of ptrack, + * fill-in version number if it does. + * Also for ptrack 2.x save schema namespace. + */ +void +get_ptrack_version(PGconn *backup_conn, PGNodeInfo *nodeInfo) +{ + PGresult *res_db; + char *ptrack_version_str; + + res_db = pgut_execute(backup_conn, + "SELECT extnamespace::regnamespace, extversion " + "FROM pg_catalog.pg_extension WHERE extname = 'ptrack'", + 0, NULL); + + if (PQntuples(res_db) > 0) + { + /* ptrack 2.x is supported, save schema name and version */ + nodeInfo->ptrack_schema = pgut_strdup(PQgetvalue(res_db, 0, 0)); + + if (nodeInfo->ptrack_schema == NULL) + elog(ERROR, "Failed to obtain schema name of ptrack extension"); + + ptrack_version_str = PQgetvalue(res_db, 0, 1); + } + else + { + /* ptrack 1.x is supported, save version */ + PQclear(res_db); + res_db = pgut_execute(backup_conn, + "SELECT proname FROM pg_proc WHERE proname='ptrack_version'", + 0, NULL); + + if (PQntuples(res_db) == 0) + { + /* ptrack is not supported */ + PQclear(res_db); + return; + } + + res_db = pgut_execute(backup_conn, + "SELECT pg_catalog.ptrack_version()", + 0, NULL); + if (PQntuples(res_db) == 0) + { + /* TODO: Something went wrong, should we error out here? */ + PQclear(res_db); + return; + } + ptrack_version_str = PQgetvalue(res_db, 0, 0); + } + + if (strcmp(ptrack_version_str, "1.5") == 0) + nodeInfo->ptrack_version_num = 15; + else if (strcmp(ptrack_version_str, "1.6") == 0) + nodeInfo->ptrack_version_num = 16; + else if (strcmp(ptrack_version_str, "1.7") == 0) + nodeInfo->ptrack_version_num = 17; + else if (strcmp(ptrack_version_str, "2.0") == 0) + nodeInfo->ptrack_version_num = 20; + else if (strcmp(ptrack_version_str, "2.1") == 0) + nodeInfo->ptrack_version_num = 21; + else + elog(WARNING, "Update your ptrack to the version 1.5 or upper. Current version is %s", + ptrack_version_str); + + PQclear(res_db); +} + +/* + * Check if ptrack is enabled in target instance + */ +bool +pg_ptrack_enable(PGconn *backup_conn, int ptrack_version_num) +{ + PGresult *res_db; + bool result = false; + + if (ptrack_version_num < 20) + { + res_db = pgut_execute(backup_conn, "SHOW ptrack_enable", 0, NULL); + result = strcmp(PQgetvalue(res_db, 0, 0), "on") == 0; + } + else if (ptrack_version_num == 20) + { + res_db = pgut_execute(backup_conn, "SHOW ptrack_map_size", 0, NULL); + result = strcmp(PQgetvalue(res_db, 0, 0), "0") != 0; + } + else + { + res_db = pgut_execute(backup_conn, "SHOW ptrack.map_size", 0, NULL); + result = strcmp(PQgetvalue(res_db, 0, 0), "0") != 0 && + strcmp(PQgetvalue(res_db, 0, 0), "-1") != 0; + } + + PQclear(res_db); + return result; +} + + +/* ---------------------------- + * Ptrack 1.* support functions + * ---------------------------- + */ + +/* Clear ptrack files in all databases of the instance we connected to */ +void +pg_ptrack_clear(PGconn *backup_conn, int ptrack_version_num) +{ + PGresult *res_db, + *res; + const char *dbname; + int i; + Oid dbOid, tblspcOid; + char *params[2]; + + // FIXME Perform this check on caller's side + if (ptrack_version_num >= 20) + return; + + params[0] = palloc(64); + params[1] = palloc(64); + res_db = pgut_execute(backup_conn, "SELECT datname, oid, dattablespace FROM pg_database", + 0, NULL); + + for(i = 0; i < PQntuples(res_db); i++) + { + PGconn *tmp_conn; + + dbname = PQgetvalue(res_db, i, 0); + if (strcmp(dbname, "template0") == 0) + continue; + + dbOid = atoi(PQgetvalue(res_db, i, 1)); + tblspcOid = atoi(PQgetvalue(res_db, i, 2)); + + tmp_conn = pgut_connect(instance_config.conn_opt.pghost, instance_config.conn_opt.pgport, + dbname, + instance_config.conn_opt.pguser); + + res = pgut_execute(tmp_conn, "SELECT pg_catalog.pg_ptrack_clear()", + 0, NULL); + PQclear(res); + + sprintf(params[0], "%i", dbOid); + sprintf(params[1], "%i", tblspcOid); + res = pgut_execute(tmp_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear_db($1, $2)", + 2, (const char **)params); + PQclear(res); + + pgut_disconnect(tmp_conn); + } + + pfree(params[0]); + pfree(params[1]); + PQclear(res_db); +} + +bool +pg_ptrack_get_and_clear_db(Oid dbOid, Oid tblspcOid, PGconn *backup_conn) +{ + char *params[2]; + char *dbname; + PGresult *res_db; + PGresult *res; + bool result; + + params[0] = palloc(64); + params[1] = palloc(64); + + sprintf(params[0], "%i", dbOid); + res_db = pgut_execute(backup_conn, + "SELECT datname FROM pg_database WHERE oid=$1", + 1, (const char **) params); + /* + * If database is not found, it's not an error. + * It could have been deleted since previous backup. + */ + if (PQntuples(res_db) != 1 || PQnfields(res_db) != 1) + return false; + + dbname = PQgetvalue(res_db, 0, 0); + + /* Always backup all files from template0 database */ + if (strcmp(dbname, "template0") == 0) + { + PQclear(res_db); + return true; + } + PQclear(res_db); + + sprintf(params[0], "%i", dbOid); + sprintf(params[1], "%i", tblspcOid); + res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear_db($1, $2)", + 2, (const char **)params); + + if (PQnfields(res) != 1) + elog(ERROR, "cannot perform pg_ptrack_get_and_clear_db()"); + + if (!parse_bool(PQgetvalue(res, 0, 0), &result)) + elog(ERROR, + "result of pg_ptrack_get_and_clear_db() is invalid: %s", + PQgetvalue(res, 0, 0)); + + PQclear(res); + pfree(params[0]); + pfree(params[1]); + + return result; +} + +/* Read and clear ptrack files of the target relation. + * Result is a bytea ptrack map of all segments of the target relation. + * case 1: we know a tablespace_oid, db_oid, and rel_filenode + * case 2: we know db_oid and rel_filenode (no tablespace_oid, because file in pg_default) + * case 3: we know only rel_filenode (because file in pg_global) + */ +char * +pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode, + size_t *result_size, PGconn *backup_conn) +{ + PGconn *tmp_conn; + PGresult *res_db, + *res; + char *params[2]; + char *result; + char *val; + + params[0] = palloc(64); + params[1] = palloc(64); + + /* regular file (not in directory 'global') */ + if (db_oid != 0) + { + char *dbname; + + sprintf(params[0], "%i", db_oid); + res_db = pgut_execute(backup_conn, + "SELECT datname FROM pg_database WHERE oid=$1", + 1, (const char **) params); + /* + * If database is not found, it's not an error. + * It could have been deleted since previous backup. + */ + if (PQntuples(res_db) != 1 || PQnfields(res_db) != 1) + return NULL; + + dbname = PQgetvalue(res_db, 0, 0); + + if (strcmp(dbname, "template0") == 0) + { + PQclear(res_db); + return NULL; + } + + tmp_conn = pgut_connect(instance_config.conn_opt.pghost, instance_config.conn_opt.pgport, + dbname, + instance_config.conn_opt.pguser); + sprintf(params[0], "%i", tablespace_oid); + sprintf(params[1], "%i", rel_filenode); + res = pgut_execute(tmp_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear($1, $2)", + 2, (const char **)params); + + if (PQnfields(res) != 1) + elog(ERROR, "cannot get ptrack file from database \"%s\" by tablespace oid %u and relation oid %u", + dbname, tablespace_oid, rel_filenode); + PQclear(res_db); + pgut_disconnect(tmp_conn); + } + /* file in directory 'global' */ + else + { + /* + * execute ptrack_get_and_clear for relation in pg_global + * Use backup_conn, cause we can do it from any database. + */ + sprintf(params[0], "%i", tablespace_oid); + sprintf(params[1], "%i", rel_filenode); + res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_ptrack_get_and_clear($1, $2)", + 2, (const char **)params); + + if (PQnfields(res) != 1) + elog(ERROR, "cannot get ptrack file from pg_global tablespace and relation oid %u", + rel_filenode); + } + + val = PQgetvalue(res, 0, 0); + + /* TODO Now pg_ptrack_get_and_clear() returns bytea ending with \x. + * It should be fixed in future ptrack releases, but till then we + * can parse it. + */ + if (strcmp("x", val+1) == 0) + { + /* Ptrack file is missing */ + return NULL; + } + + result = (char *) PQunescapeBytea((unsigned char *) PQgetvalue(res, 0, 0), + result_size); + PQclear(res); + pfree(params[0]); + pfree(params[1]); + + return result; +} + +/* + * Get lsn of the moment when ptrack was enabled the last time. + */ +XLogRecPtr +get_last_ptrack_lsn(PGconn *backup_conn, PGNodeInfo *nodeInfo) + +{ + PGresult *res; + uint32 lsn_hi; + uint32 lsn_lo; + XLogRecPtr lsn; + + if (nodeInfo->ptrack_version_num < 20) + res = pgut_execute(backup_conn, "SELECT pg_catalog.pg_ptrack_control_lsn()", + 0, NULL); + else + { + char query[128]; + + if (nodeInfo->ptrack_version_num == 20) + sprintf(query, "SELECT %s.pg_ptrack_control_lsn()", nodeInfo->ptrack_schema); + else + sprintf(query, "SELECT %s.ptrack_init_lsn()", nodeInfo->ptrack_schema); + + res = pgut_execute(backup_conn, query, 0, NULL); + } + + /* Extract timeline and LSN from results of pg_start_backup() */ + XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); + /* Calculate LSN */ + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; + + PQclear(res); + return lsn; +} + +char * +pg_ptrack_get_block(ConnectionArgs *arguments, + Oid dbOid, + Oid tblsOid, + Oid relOid, + BlockNumber blknum, + size_t *result_size, + int ptrack_version_num, + const char *ptrack_schema) +{ + PGresult *res; + char *params[4]; + char *result; + + params[0] = palloc(64); + params[1] = palloc(64); + params[2] = palloc(64); + params[3] = palloc(64); + + /* + * Use tmp_conn, since we may work in parallel threads. + * We can connect to any database. + */ + sprintf(params[0], "%i", tblsOid); + sprintf(params[1], "%i", dbOid); + sprintf(params[2], "%i", relOid); + sprintf(params[3], "%u", blknum); + + if (arguments->conn == NULL) + { + arguments->conn = pgut_connect(instance_config.conn_opt.pghost, + instance_config.conn_opt.pgport, + instance_config.conn_opt.pgdatabase, + instance_config.conn_opt.pguser); + } + + if (arguments->cancel_conn == NULL) + arguments->cancel_conn = PQgetCancel(arguments->conn); + + // elog(LOG, "db %i pg_ptrack_get_block(%i, %i, %u)",dbOid, tblsOid, relOid, blknum); + + if (ptrack_version_num < 20) + res = pgut_execute_parallel(arguments->conn, + arguments->cancel_conn, + "SELECT pg_catalog.pg_ptrack_get_block_2($1, $2, $3, $4)", + 4, (const char **)params, true, false, false); + else + { + char query[128]; + + /* sanity */ + if (!ptrack_schema) + elog(ERROR, "Schema name of ptrack extension is missing"); + + if (ptrack_version_num == 20) + sprintf(query, "SELECT %s.pg_ptrack_get_block($1, $2, $3, $4)", ptrack_schema); + else + elog(ERROR, "ptrack >= 2.1.0 does not support pg_ptrack_get_block()"); + // sprintf(query, "SELECT %s.ptrack_get_block($1, $2, $3, $4)", ptrack_schema); + + res = pgut_execute_parallel(arguments->conn, + arguments->cancel_conn, + query, 4, (const char **)params, + true, false, false); + } + + if (PQnfields(res) != 1) + { + elog(VERBOSE, "cannot get file block for relation oid %u", + relOid); + return NULL; + } + + if (PQgetisnull(res, 0, 0)) + { + elog(VERBOSE, "cannot get file block for relation oid %u", + relOid); + return NULL; + } + + result = (char *) PQunescapeBytea((unsigned char *) PQgetvalue(res, 0, 0), + result_size); + + PQclear(res); + + pfree(params[0]); + pfree(params[1]); + pfree(params[2]); + pfree(params[3]); + + return result; +} + +/* ---------------------------- + * Ptrack 2.* support functions + * ---------------------------- + */ + +/* + * Fetch a list of changed files with their ptrack maps. + */ +parray * +pg_ptrack_get_pagemapset(PGconn *backup_conn, const char *ptrack_schema, + int ptrack_version_num, XLogRecPtr lsn) +{ + PGresult *res; + char lsn_buf[17 + 1]; + char *params[1]; + parray *pagemapset = NULL; + int i; + char query[512]; + + snprintf(lsn_buf, sizeof lsn_buf, "%X/%X", (uint32) (lsn >> 32), (uint32) lsn); + params[0] = pstrdup(lsn_buf); + + if (!ptrack_schema) + elog(ERROR, "Schema name of ptrack extension is missing"); + + if (ptrack_version_num == 20) + sprintf(query, "SELECT path, pagemap FROM %s.pg_ptrack_get_pagemapset($1) ORDER BY 1", + ptrack_schema); + else + sprintf(query, "SELECT path, pagemap FROM %s.ptrack_get_pagemapset($1) ORDER BY 1", + ptrack_schema); + + res = pgut_execute(backup_conn, query, 1, (const char **) params); + pfree(params[0]); + + if (PQnfields(res) != 2) + elog(ERROR, "cannot get ptrack pagemapset"); + + /* sanity ? */ + + /* Construct database map */ + for (i = 0; i < PQntuples(res); i++) + { + page_map_entry *pm_entry = (page_map_entry *) pgut_malloc(sizeof(page_map_entry)); + + /* get path */ + pm_entry->path = pgut_strdup(PQgetvalue(res, i, 0)); + + /* get bytea */ + pm_entry->pagemap = (char *) PQunescapeBytea((unsigned char *) PQgetvalue(res, i, 1), + &pm_entry->pagemapsize); + + if (pagemapset == NULL) + pagemapset = parray_new(); + + parray_append(pagemapset, pm_entry); + } + + PQclear(res); + + return pagemapset; +} + +/* + * Given a list of files in the instance to backup, build a pagemap for each + * data file that has ptrack. Result is saved in the pagemap field of pgFile. + * + * We fetch a list of changed files with their ptrack maps. After that files + * are merged with their bitmaps. File without bitmap is treated as unchanged. + */ +void +make_pagemap_from_ptrack_2(parray *files, + PGconn *backup_conn, + const char *ptrack_schema, + int ptrack_version_num, + XLogRecPtr lsn) +{ + parray *filemaps; + int file_i = 0; + page_map_entry *dummy_map = NULL; + + /* Receive all available ptrack bitmaps at once */ + filemaps = pg_ptrack_get_pagemapset(backup_conn, ptrack_schema, + ptrack_version_num, lsn); + + if (filemaps != NULL) + parray_qsort(filemaps, pgFileMapComparePath); + else + return; + + dummy_map = (page_map_entry *) pgut_malloc(sizeof(page_map_entry)); + + /* Iterate over files and look for corresponding pagemap if any */ + for (file_i = 0; file_i < parray_num(files); file_i++) + { + pgFile *file = (pgFile *) parray_get(files, file_i); + page_map_entry **res_map = NULL; + page_map_entry *map = NULL; + + /* + * For now nondata files are not entitled to have pagemap + * TODO It's possible to use ptrack for incremental backup of + * relation forks. Not implemented yet. + */ + if (!file->is_datafile || file->is_cfs) + continue; + + /* Consider only files from PGDATA (this check is probably redundant) */ + if (file->external_dir_num != 0) + continue; + + if (filemaps) + { + dummy_map->path = file->rel_path; + res_map = parray_bsearch(filemaps, dummy_map, pgFileMapComparePath); + map = (res_map) ? *res_map : NULL; + } + + /* Found map */ + if (map) + { + elog(VERBOSE, "Using ptrack pagemap for file \"%s\"", file->rel_path); + file->pagemap.bitmapsize = map->pagemapsize; + file->pagemap.bitmap = map->pagemap; + } + } + + free(dummy_map); +} diff --git a/src/restore.c b/src/restore.c index 3fa8f09fb..2ade54fa8 100644 --- a/src/restore.c +++ b/src/restore.c @@ -3,27 +3,34 @@ * restore.c: restore DB cluster and archived WAL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" -#include +#include "access/timeline.h" + #include -#include #include -#include -#include "catalog/pg_control.h" -#include "utils/logger.h" #include "utils/thread.h" typedef struct { - parray *files; - pgBackup *backup; + parray *pgdata_files; + parray *dest_files; + pgBackup *dest_backup; + parray *dest_external_dirs; + parray *parent_chain; + parray *dbOid_exclude_list; + bool skip_external_dirs; + const char *to_root; + size_t restored_bytes; + bool use_bitmap; + IncrRestoreMode incremental_mode; + XLogRecPtr shift_lsn; /* used only in LSN incremental_mode */ /* * Return value from the thread. @@ -32,40 +39,110 @@ typedef struct int ret; } restore_files_arg; -static void restore_backup(pgBackup *backup); static void create_recovery_conf(time_t backup_id, pgRecoveryTarget *rt, - pgBackup *backup); + pgBackup *backup, + pgRestoreParams *params); static void *restore_files(void *arg); -static void remove_deleted_files(pgBackup *backup); +static void set_orphan_status(parray *backups, pgBackup *parent_backup); +static void pg12_recovery_config(pgBackup *backup, bool add_include); +static void restore_chain(pgBackup *dest_backup, parray *parent_chain, + parray *dbOid_exclude_list, pgRestoreParams *params, + const char *pgdata_path, bool no_sync); +static void check_incremental_compatibility(const char *pgdata, uint64 system_identifier, + IncrRestoreMode incremental_mode); + +/* + * Iterate over backup list to find all ancestors of the broken parent_backup + * and update their status to BACKUP_STATUS_ORPHAN + */ +static void +set_orphan_status(parray *backups, pgBackup *parent_backup) +{ + /* chain is intact, but at least one parent is invalid */ + char *parent_backup_id; + int j; + + /* parent_backup_id is a human-readable backup ID */ + parent_backup_id = base36enc_dup(parent_backup->start_time); + + for (j = 0; j < parray_num(backups); j++) + { + + pgBackup *backup = (pgBackup *) parray_get(backups, j); + + if (is_parent(parent_backup->start_time, backup, false)) + { + if (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(backup, BACKUP_STATUS_ORPHAN, instance_name, true); + + elog(WARNING, + "Backup %s is orphaned because his parent %s has status: %s", + base36enc(backup->start_time), + parent_backup_id, + status2str(parent_backup->status)); + } + else + { + elog(WARNING, "Backup %s has parent %s with status: %s", + base36enc(backup->start_time), parent_backup_id, + status2str(parent_backup->status)); + } + } + } + pg_free(parent_backup_id); +} /* * Entry point of pg_probackup RESTORE and VALIDATE subcommands. */ int do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, - bool is_restore) + pgRestoreParams *params, bool no_sync) { int i = 0; - parray *backups; + int j = 0; + parray *backups = NULL; + pgBackup *tmp_backup = NULL; pgBackup *current_backup = NULL; pgBackup *dest_backup = NULL; pgBackup *base_full_backup = NULL; pgBackup *corrupted_backup = NULL; - int dest_backup_index = 0; - int base_full_backup_index = 0; - int corrupted_backup_index = 0; - char *action = is_restore ? "Restore":"Validate"; - - if (is_restore) + char *action = params->is_restore ? "Restore":"Validate"; + parray *parent_chain = NULL; + parray *dbOid_exclude_list = NULL; + bool pgdata_is_empty = true; + bool tblspaces_are_empty = true; + XLogRecPtr shift_lsn = InvalidXLogRecPtr; + + if (params->is_restore) { - if (pgdata == NULL) + if (instance_config.pgdata == NULL) elog(ERROR, "required parameter not specified: PGDATA (-D, --pgdata)"); /* Check if restore destination empty */ - if (!dir_is_empty(pgdata)) - elog(ERROR, "restore destination is not empty: \"%s\"", pgdata); + if (!dir_is_empty(instance_config.pgdata, FIO_DB_HOST)) + { + /* Check that remote system is NOT running and systemd id is the same as ours */ + if (params->incremental_mode != INCR_NONE) + { + elog(INFO, "Running incremental restore into nonempty directory: \"%s\"", + instance_config.pgdata); + + check_incremental_compatibility(instance_config.pgdata, + instance_config.system_identifier, + params->incremental_mode); + } + else + elog(ERROR, "Restore destination is not empty: \"%s\"", + instance_config.pgdata); + + /* if destination directory is empty, then incremental restore may be disabled */ + pgdata_is_empty = false; + } } if (instance_name == NULL) @@ -73,10 +150,8 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, elog(LOG, "%s begin.", action); - /* Get exclusive lock of backup catalog */ - catalog_lock(); /* Get list of all backups sorted in order of descending start time */ - backups = catalog_get_backup_list(INVALID_BACKUP_ID); + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); /* Find backup range we should restore or validate. */ while ((i < parray_num(backups)) && !dest_backup) @@ -91,11 +166,18 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, /* * [PGPRO-1164] If BACKUP_ID is not provided for restore command, * we must find the first valid(!) backup. + + * If target_backup_id is not provided, we can be sure that + * PITR for restore or validate is requested. + * So we can assume that user is more interested in recovery to specific point + * in time and NOT interested in revalidation of invalid backups. + * So based on that assumptions we should choose only OK and DONE backups + * as candidates for validate and restore. */ - if (is_restore && - target_backup_id == INVALID_BACKUP_ID && - current_backup->status != BACKUP_STATUS_OK) + if (target_backup_id == INVALID_BACKUP_ID && + (current_backup->status != BACKUP_STATUS_OK && + current_backup->status != BACKUP_STATUS_DONE)) { elog(WARNING, "Skipping backup %s, because it has non-valid status: %s", base36enc(current_backup->start_time), status2str(current_backup->status)); @@ -111,14 +193,22 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, { /* backup is not ok, - * but in case of CORRUPT, ORPHAN or DONE revalidation can be done, + * but in case of CORRUPT or ORPHAN revalidation is possible + * unless --no-validate is used, * in other cases throw an error. */ - if (current_backup->status != BACKUP_STATUS_OK) + // 1. validate + // 2. validate -i INVALID_ID <- allowed revalidate + // 3. restore -i INVALID_ID <- allowed revalidate and restore + // 4. restore <- impossible + // 5. restore --no-validate <- forbidden + if (current_backup->status != BACKUP_STATUS_OK && + current_backup->status != BACKUP_STATUS_DONE) { - if (current_backup->status == BACKUP_STATUS_DONE || - current_backup->status == BACKUP_STATUS_ORPHAN || - current_backup->status == BACKUP_STATUS_CORRUPT) + if ((current_backup->status == BACKUP_STATUS_ORPHAN || + current_backup->status == BACKUP_STATUS_CORRUPT || + current_backup->status == BACKUP_STATUS_RUNNING) + && (!params->no_validate || params->force)) elog(WARNING, "Backup %s has status: %s", base36enc(current_backup->start_time), status2str(current_backup->status)); else @@ -126,13 +216,13 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, base36enc(current_backup->start_time), status2str(current_backup->status)); } - if (rt->recovery_target_tli) + if (rt->target_tli) { parray *timelines; - elog(LOG, "target timeline ID = %u", rt->recovery_target_tli); + // elog(LOG, "target timeline ID = %u", rt->target_tli); /* Read timeline history files from archives */ - timelines = readTimeLineHistory_probackup(rt->recovery_target_tli); + timelines = read_timeline_history(arclog_path, rt->target_tli, true); if (!satisfy_timeline(timelines, current_backup)) { @@ -143,12 +233,15 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, /* Try to find another backup that satisfies target timeline */ continue; } + + parray_walk(timelines, pfree); + parray_free(timelines); } if (!satisfy_recovery_target(current_backup, rt)) { if (target_backup_id != INVALID_BACKUP_ID) - elog(ERROR, "target backup %s does not satisfy restore options", + elog(ERROR, "Requested backup %s does not satisfy restore options", base36enc(target_backup_id)); else /* Try to find another backup that satisfies target options */ @@ -160,43 +253,82 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, * Save it as dest_backup */ dest_backup = current_backup; - dest_backup_index = i-1; } } + /* TODO: Show latest possible target */ if (dest_backup == NULL) - elog(ERROR, "Backup satisfying target options is not found."); + { + /* Failed to find target backup */ + if (target_backup_id) + elog(ERROR, "Requested backup %s is not found.", base36enc(target_backup_id)); + else + elog(ERROR, "Backup satisfying target options is not found."); + /* TODO: check if user asked PITR or just restore of latest backup */ + } /* If we already found dest_backup, look for full backup. */ - if (dest_backup) + if (dest_backup->backup_mode == BACKUP_MODE_FULL) + base_full_backup = dest_backup; + else { - base_full_backup = current_backup; + int result; - if (current_backup->backup_mode != BACKUP_MODE_FULL) + result = scan_parent_chain(dest_backup, &tmp_backup); + + if (result == ChainIsBroken) { - base_full_backup = find_parent_backup(current_backup); + /* chain is broken, determine missing backup ID + * and orphinize all his descendants + */ + char *missing_backup_id; + time_t missing_backup_start_time; - if (base_full_backup == NULL) - elog(ERROR, "Valid full backup for backup %s is not found.", - base36enc(current_backup->start_time)); - } + missing_backup_start_time = tmp_backup->parent_backup; + missing_backup_id = base36enc_dup(tmp_backup->parent_backup); - /* - * We have found full backup by link, - * now we need to walk the list to find its index. - * - * TODO I think we should rewrite it someday to use double linked list - * and avoid relying on sort order anymore. - */ - for (i = dest_backup_index; i < parray_num(backups); i++) - { - pgBackup * temp_backup = (pgBackup *) parray_get(backups, i); - if (temp_backup->start_time == base_full_backup->start_time) + for (j = 0; j < parray_num(backups); j++) { - base_full_backup_index = i; - break; + pgBackup *backup = (pgBackup *) parray_get(backups, j); + + /* use parent backup start_time because he is missing + * and we must orphinize his descendants + */ + if (is_parent(missing_backup_start_time, backup, false)) + { + if (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(backup, BACKUP_STATUS_ORPHAN, instance_name, true); + + elog(WARNING, "Backup %s is orphaned because his parent %s is missing", + base36enc(backup->start_time), missing_backup_id); + } + else + { + elog(WARNING, "Backup %s has missing parent %s", + base36enc(backup->start_time), missing_backup_id); + } + } } + pg_free(missing_backup_id); + /* No point in doing futher */ + elog(ERROR, "%s of backup %s failed.", action, base36enc(dest_backup->start_time)); + } + else if (result == ChainIsInvalid) + { + /* chain is intact, but at least one parent is invalid */ + set_orphan_status(backups, tmp_backup); + tmp_backup = find_parent_full_backup(dest_backup); + + /* sanity */ + if (!tmp_backup) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(dest_backup->start_time)); } + + /* We have found full backup */ + base_full_backup = tmp_backup; } if (base_full_backup == NULL) @@ -206,10 +338,153 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, * Ensure that directories provided in tablespace mapping are valid * i.e. empty or not exist. */ - if (is_restore) - check_tablespace_mapping(dest_backup); + if (params->is_restore) + { + check_tablespace_mapping(dest_backup, params->incremental_mode != INCR_NONE, &tblspaces_are_empty); + + if (params->incremental_mode != INCR_NONE && pgdata_is_empty && tblspaces_are_empty) + { + elog(INFO, "Destination directory and tablespace directories are empty, " + "disable incremental restore"); + params->incremental_mode = INCR_NONE; + } + + /* no point in checking external directories if their restore is not requested */ + if (!params->skip_external_dirs) + check_external_dir_mapping(dest_backup, params->incremental_mode != INCR_NONE); + } - if (!is_restore || !rt->restore_no_validate) + /* At this point we are sure that parent chain is whole + * so we can build separate array, containing all needed backups, + * to simplify validation and restore + */ + parent_chain = parray_new(); + + /* Take every backup that is a child of base_backup AND parent of dest_backup + * including base_backup and dest_backup + */ + + tmp_backup = dest_backup; + while (tmp_backup) + { + parray_append(parent_chain, tmp_backup); + tmp_backup = tmp_backup->parent_backup_link; + } + + /* + * Determine the shift-LSN + * Consider the example A: + * + * + * /----D----------F-> + * -A--B---C---*-------X-----> + * + * [A,F] - incremental chain + * X - the state of pgdata + * F - destination backup + * * - switch point + * + * When running incremental restore in 'lsn' mode, we get a bitmap of pages, + * whose LSN is less than shift-LSN (backup C stop_lsn). + * So when restoring file, we can skip restore of pages coming from + * A, B and C. + * Pages from D and F cannot be skipped due to incremental restore. + * + * Consider the example B: + * + * + * /----------X----> + * ----*---A---B---C--> + * + * [A,C] - incremental chain + * X - the state of pgdata + * C - destination backup + * * - switch point + * + * Incremental restore in shift mode IS NOT POSSIBLE in this case. + * We must be able to differentiate the scenario A and scenario B. + * + */ + if (params->is_restore && params->incremental_mode == INCR_LSN) + { + RedoParams redo; + parray *timelines = NULL; + get_redo(instance_config.pgdata, &redo); + + if (redo.checksum_version == 0) + elog(ERROR, "Incremental restore in 'lsn' mode require " + "data_checksums to be enabled in destination data directory"); + + timelines = read_timeline_history(arclog_path, redo.tli, false); + + if (!timelines) + elog(WARNING, "Failed to get history for redo timeline %i, " + "multi-timeline incremental restore in 'lsn' mode is impossible", redo.tli); + + tmp_backup = dest_backup; + + while (tmp_backup) + { + /* Candidate, whose stop_lsn if less than shift LSN, is found */ + if (tmp_backup->stop_lsn < redo.lsn) + { + /* if candidate timeline is the same as redo TLI, + * then we are good to go. + */ + if (redo.tli == tmp_backup->tli) + { + elog(INFO, "Backup %s is chosen as shiftpoint, its Stop LSN will be used as shift LSN", + base36enc(tmp_backup->start_time)); + + shift_lsn = tmp_backup->stop_lsn; + break; + } + + if (!timelines) + { + elog(WARNING, "Redo timeline %i differs from target timeline %i, " + "in this case, to safely run incremental restore in 'lsn' mode, " + "the history file for timeline %i is mandatory", + redo.tli, tmp_backup->tli, redo.tli); + break; + } + + /* check whether the candidate tli is a part of redo TLI history */ + if (tliIsPartOfHistory(timelines, tmp_backup->tli)) + { + shift_lsn = tmp_backup->stop_lsn; + break; + } + else + elog(INFO, "Backup %s cannot be a shiftpoint, " + "because its tli %i is not in history of redo timeline %i", + base36enc(tmp_backup->start_time), tmp_backup->tli, redo.tli); + } + + tmp_backup = tmp_backup->parent_backup_link; + } + + if (XLogRecPtrIsInvalid(shift_lsn)) + elog(ERROR, "Cannot perform incremental restore of backup chain %s in 'lsn' mode, " + "because destination directory redo point %X/%X on tli %i is out of reach", + base36enc(dest_backup->start_time), + (uint32) (redo.lsn >> 32), (uint32) redo.lsn, redo.tli); + else + elog(INFO, "Destination directory redo point %X/%X on tli %i is " + "within reach of backup %s with Stop LSN %X/%X on tli %i", + (uint32) (redo.lsn >> 32), (uint32) redo.lsn, redo.tli, + base36enc(tmp_backup->start_time), + (uint32) (tmp_backup->stop_lsn >> 32), (uint32) tmp_backup->stop_lsn, + tmp_backup->tli); + + elog(INFO, "shift LSN: %X/%X", + (uint32) (shift_lsn >> 32), (uint32) shift_lsn); + + params->shift_lsn = shift_lsn; + } + + /* for validation or restore with enabled validation */ + if (!params->is_restore || !params->no_validate) { if (dest_backup->backup_mode != BACKUP_MODE_FULL) elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time)); @@ -217,190 +492,408 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, /* * Validate backups from base_full_backup to dest_backup. */ - for (i = base_full_backup_index; i >= dest_backup_index; i--) + for (i = parray_num(parent_chain) - 1; i >= 0; i--) { - pgBackup *backup = (pgBackup *) parray_get(backups, i); + tmp_backup = (pgBackup *) parray_get(parent_chain, i); - pgBackupValidate(backup); - /* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */ - if (backup->status == BACKUP_STATUS_CORRUPT) + /* Do not interrupt, validate the next backup */ + if (!lock_backup(tmp_backup, true)) { - corrupted_backup = backup; - corrupted_backup_index = i; + if (params->is_restore) + elog(ERROR, "Cannot lock backup %s directory", + base36enc(tmp_backup->start_time)); + else + { + elog(WARNING, "Cannot lock backup %s directory, skip validation", + base36enc(tmp_backup->start_time)); + continue; + } + } + + /* validate datafiles only */ + pgBackupValidate(tmp_backup, params); + + /* After pgBackupValidate() only following backup + * states are possible: ERROR, RUNNING, CORRUPT and OK. + * Validate WAL only for OK, because there is no point + * in WAL validation for corrupted, errored or running backups. + */ + if (tmp_backup->status != BACKUP_STATUS_OK) + { + corrupted_backup = tmp_backup; break; } /* We do not validate WAL files of intermediate backups * It`s done to speed up restore */ } - /* There is no point in wal validation - * if there is corrupted backup between base_backup and dest_backup - */ + + /* There is no point in wal validation of corrupted backups */ + // TODO: there should be a way for a user to request only(!) WAL validation if (!corrupted_backup) + { /* * Validate corresponding WAL files. * We pass base_full_backup timeline as last argument to this function, * because it's needed to form the name of xlog file. */ - validate_wal(dest_backup, arclog_path, rt->recovery_target_time, - rt->recovery_target_xid, rt->recovery_target_lsn, - base_full_backup->tli); - - /* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */ - if (corrupted_backup) - { - for (i = corrupted_backup_index - 1; i >= 0; i--) - { - pgBackup *backup = (pgBackup *) parray_get(backups, i); - /* Mark incremental OK backup as orphan */ - if (backup->backup_mode == BACKUP_MODE_FULL) - break; - if (backup->status != BACKUP_STATUS_OK) - continue; - else - { - char *backup_id, - *corrupted_backup_id; - - backup->status = BACKUP_STATUS_ORPHAN; - pgBackupWriteBackupControlFile(backup); - - backup_id = base36enc_dup(backup->start_time); - corrupted_backup_id = base36enc_dup(corrupted_backup->start_time); - - elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted", - backup_id, corrupted_backup_id); - - free(backup_id); - free(corrupted_backup_id); - } - } + validate_wal(dest_backup, arclog_path, rt->target_time, + rt->target_xid, rt->target_lsn, + dest_backup->tli, instance_config.xlog_seg_size); } + /* Orphanize every OK descendant of corrupted backup */ + else + set_orphan_status(backups, corrupted_backup); } /* * If dest backup is corrupted or was orphaned in previous check * produce corresponding error message */ - if (dest_backup->status == BACKUP_STATUS_OK) + if (dest_backup->status == BACKUP_STATUS_OK || + dest_backup->status == BACKUP_STATUS_DONE) { - if (rt->restore_no_validate) - elog(INFO, "Backup %s is used without validation.", base36enc(dest_backup->start_time)); + if (params->no_validate) + elog(WARNING, "Backup %s is used without validation.", base36enc(dest_backup->start_time)); else elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time)); } else if (dest_backup->status == BACKUP_STATUS_CORRUPT) - elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time)); + { + if (params->force) + elog(WARNING, "Backup %s is corrupt.", base36enc(dest_backup->start_time)); + else + elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time)); + } else if (dest_backup->status == BACKUP_STATUS_ORPHAN) - elog(ERROR, "Backup %s is orphan.", base36enc(dest_backup->start_time)); + { + if (params->force) + elog(WARNING, "Backup %s is orphan.", base36enc(dest_backup->start_time)); + else + elog(ERROR, "Backup %s is orphan.", base36enc(dest_backup->start_time)); + } else elog(ERROR, "Backup %s has status: %s", base36enc(dest_backup->start_time), status2str(dest_backup->status)); - /* We ensured that all backups are valid, now restore if required */ - if (is_restore) + /* We ensured that all backups are valid, now restore if required + */ + if (params->is_restore) { - for (i = base_full_backup_index; i >= dest_backup_index; i--) - { - pgBackup *backup = (pgBackup *) parray_get(backups, i); - - if (rt->lsn_specified && parse_server_version(backup->server_version) < 100000) - elog(ERROR, "Backup %s was created for version %s which doesn't support recovery_target_lsn", - base36enc(dest_backup->start_time), dest_backup->server_version); - - restore_backup(backup); - } - /* - * Delete files which are not in dest backup file list. Files which were - * deleted between previous and current backup are not in the list. + * Get a list of dbOids to skip if user requested the partial restore. + * It is important that we do this after(!) validation so + * database_map can be trusted. + * NOTE: database_map could be missing for legal reasons, e.g. missing + * permissions on pg_database during `backup` and, as long as user + * do not request partial restore, it`s OK. + * + * If partial restore is requested and database map doesn't exist, + * throw an error. */ - if (dest_backup->backup_mode != BACKUP_MODE_FULL) - remove_deleted_files(dest_backup); + if (params->partial_db_list) + dbOid_exclude_list = get_dbOid_exclude_list(dest_backup, params->partial_db_list, + params->partial_restore_type); + + if (rt->lsn_string && + parse_server_version(dest_backup->server_version) < 100000) + elog(ERROR, "Backup %s was created for version %s which doesn't support recovery_target_lsn", + base36enc(dest_backup->start_time), + dest_backup->server_version); + + restore_chain(dest_backup, parent_chain, dbOid_exclude_list, + params, instance_config.pgdata, no_sync); /* Create recovery.conf with given recovery target parameters */ - create_recovery_conf(target_backup_id, rt, dest_backup); + create_recovery_conf(target_backup_id, rt, dest_backup, params); } + /* ssh connection to longer needed */ + fio_disconnect(); + + elog(INFO, "%s of backup %s completed.", + action, base36enc(dest_backup->start_time)); + /* cleanup */ parray_walk(backups, pgBackupFree); parray_free(backups); + parray_free(parent_chain); - elog(INFO, "%s of backup %s completed.", - action, base36enc(dest_backup->start_time)); return 0; } /* - * Restore one backup. + * Restore backup chain. */ void -restore_backup(pgBackup *backup) +restore_chain(pgBackup *dest_backup, parray *parent_chain, + parray *dbOid_exclude_list, pgRestoreParams *params, + const char *pgdata_path, bool no_sync) { - char timestamp[100]; - char this_backup_path[MAXPGPATH]; - char database_path[MAXPGPATH]; - char list_path[MAXPGPATH]; - parray *files; int i; + char timestamp[100]; + parray *pgdata_files = NULL; + parray *dest_files = NULL; + parray *external_dirs = NULL; /* arrays with meta info for multi threaded backup */ pthread_t *threads; restore_files_arg *threads_args; bool restore_isok = true; + bool use_bitmap = true; + + /* fancy reporting */ + char pretty_dest_bytes[20]; + char pretty_total_bytes[20]; + size_t dest_bytes = 0; + size_t total_bytes = 0; + char pretty_time[20]; + time_t start_time, end_time; + + /* Preparations for actual restoring */ + time2iso(timestamp, lengthof(timestamp), dest_backup->start_time); + elog(INFO, "Restoring the database from backup at %s", timestamp); + + dest_files = get_backup_filelist(dest_backup, true); + + /* Lock backup chain and make sanity checks */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + if (!lock_backup(backup, true)) + elog(ERROR, "Cannot lock backup %s", base36enc(backup->start_time)); + + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + { + if (params->force) + elog(WARNING, "Backup %s is not valid, restore is forced", + base36enc(backup->start_time)); + else + elog(ERROR, "Backup %s cannot be restored because it is not valid", + base36enc(backup->start_time)); + } + + /* confirm block size compatibility */ + if (backup->block_size != BLCKSZ) + elog(ERROR, + "BLCKSZ(%d) is not compatible(%d expected)", + backup->block_size, BLCKSZ); - if (backup->status != BACKUP_STATUS_OK) - elog(ERROR, "Backup %s cannot be restored because it is not valid", - base36enc(backup->start_time)); + if (backup->wal_block_size != XLOG_BLCKSZ) + elog(ERROR, + "XLOG_BLCKSZ(%d) is not compatible(%d expected)", + backup->wal_block_size, XLOG_BLCKSZ); + + /* populate backup filelist */ + if (backup->start_time != dest_backup->start_time) + backup->files = get_backup_filelist(backup, true); + else + backup->files = dest_files; + + /* + * this sorting is important, because we rely on it to find + * destination file in intermediate backups file lists + * using bsearch. + */ + parray_qsort(backup->files, pgFileCompareRelPathWithExternal); + } + + /* If dest backup version is older than 2.4.0, then bitmap optimization + * is impossible to use, because bitmap restore rely on pgFile.n_blocks, + * which is not always available in old backups. + */ + if (parse_program_version(dest_backup->program_version) < 20400) + { + use_bitmap = false; - /* confirm block size compatibility */ - if (backup->block_size != BLCKSZ) - elog(ERROR, - "BLCKSZ(%d) is not compatible(%d expected)", - backup->block_size, BLCKSZ); - if (backup->wal_block_size != XLOG_BLCKSZ) - elog(ERROR, - "XLOG_BLCKSZ(%d) is not compatible(%d expected)", - backup->wal_block_size, XLOG_BLCKSZ); + if (params->incremental_mode != INCR_NONE) + elog(ERROR, "incremental restore is not possible for backups older than 2.3.0 version"); + } - time2iso(timestamp, lengthof(timestamp), backup->start_time); - elog(LOG, "restoring database from backup %s", timestamp); + /* There is no point in bitmap restore, when restoring a single FULL backup, + * unless we are running incremental-lsn restore, then bitmap is mandatory. + */ + if (use_bitmap && parray_num(parent_chain) == 1) + { + if (params->incremental_mode == INCR_NONE) + use_bitmap = false; + else + use_bitmap = true; + } /* - * Restore backup directories. - * this_backup_path = $BACKUP_PATH/backups/instance_name/backup_id + * Restore dest_backup internal directories. */ - pgBackupGetPath(backup, this_backup_path, lengthof(this_backup_path), NULL); - create_data_directories(pgdata, this_backup_path, true); + create_data_directories(dest_files, instance_config.pgdata, + dest_backup->root_dir, true, + params->incremental_mode != INCR_NONE, + FIO_DB_HOST); /* - * Get list of files which need to be restored. + * Restore dest_backup external directories. */ - pgBackupGetPath(backup, database_path, lengthof(database_path), DATABASE_DIR); - pgBackupGetPath(backup, list_path, lengthof(list_path), DATABASE_FILE_LIST); - files = dir_read_file_list(database_path, list_path); + if (dest_backup->external_dir_str && !params->skip_external_dirs) + { + external_dirs = make_external_directory_list(dest_backup->external_dir_str, true); - threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); - threads_args = (restore_files_arg *) palloc(sizeof(restore_files_arg)*num_threads); + if (!external_dirs) + elog(ERROR, "Failed to get a list of external directories"); - /* setup threads */ - for (i = 0; i < parray_num(files); i++) + if (parray_num(external_dirs) > 0) + elog(LOG, "Restore external directories"); + + for (i = 0; i < parray_num(external_dirs); i++) + fio_mkdir(parray_get(external_dirs, i), + DIR_PERMISSION, FIO_DB_HOST); + } + + /* + * Setup directory structure for external directories and file locks + */ + for (i = 0; i < parray_num(dest_files); i++) { - pgFile *file = (pgFile *) parray_get(files, i); + pgFile *file = (pgFile *) parray_get(dest_files, i); + if (S_ISDIR(file->mode)) + total_bytes += 4096; + + if (!params->skip_external_dirs && + file->external_dir_num && S_ISDIR(file->mode)) + { + char *external_path; + char dirpath[MAXPGPATH]; + + if (parray_num(external_dirs) < file->external_dir_num - 1) + elog(ERROR, "Inconsistent external directory backup metadata"); + + external_path = parray_get(external_dirs, file->external_dir_num - 1); + join_path_components(dirpath, external_path, file->rel_path); + + elog(VERBOSE, "Create external directory \"%s\"", dirpath); + fio_mkdir(dirpath, file->mode, FIO_DB_HOST); + } + + /* setup threads */ pg_atomic_clear_flag(&file->lock); } + /* Get list of files in destination directory and remove redundant files */ + if (params->incremental_mode != INCR_NONE) + { + pgdata_files = parray_new(); + + elog(INFO, "Extracting the content of destination directory for incremental restore"); + + time(&start_time); + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(pgdata_files, pgdata_path, false, true, false, false, true, 0); + else + dir_list_file(pgdata_files, pgdata_path, + false, true, false, false, true, 0, FIO_LOCAL_HOST); + + /* get external dirs content */ + if (external_dirs) + { + for (i = 0; i < parray_num(external_dirs); i++) + { + char *external_path = parray_get(external_dirs, i); + parray *external_files = parray_new(); + + if (fio_is_remote(FIO_DB_HOST)) + fio_list_dir(external_files, external_path, + false, true, false, false, true, i+1); + else + dir_list_file(external_files, external_path, + false, true, false, false, true, i+1, + FIO_LOCAL_HOST); + + parray_concat(pgdata_files, external_files); + parray_free(external_files); + } + } + + parray_qsort(pgdata_files, pgFileCompareRelPathWithExternalDesc); + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + + elog(INFO, "Destination directory content extracted, time elapsed: %s", + pretty_time); + + elog(INFO, "Removing redundant files in destination directory"); + time(&start_time); + for (i = 0; i < parray_num(pgdata_files); i++) + { + pgFile *file = (pgFile *) parray_get(pgdata_files, i); + + /* if file does not exists in destination list, then we can safely unlink it */ + if (parray_bsearch(dest_backup->files, file, pgFileCompareRelPathWithExternal) == NULL) + { + char fullpath[MAXPGPATH]; + + join_path_components(fullpath, pgdata_path, file->rel_path); + +// fio_pgFileDelete(file, full_file_path); + fio_delete(file->mode, fullpath, FIO_DB_HOST); + elog(VERBOSE, "Deleted file \"%s\"", fullpath); + + /* shrink pgdata list */ + parray_remove(pgdata_files, i); + i--; + } + } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + + /* At this point PDATA do not contain files, that do not exists in dest backup file list */ + elog(INFO, "Redundant files are removed, time elapsed: %s", pretty_time); + } + + /* + * Close ssh connection belonging to the main thread + * to avoid the possibility of been killed for idleness + */ + fio_disconnect(); + + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (restore_files_arg *) palloc(sizeof(restore_files_arg) * + num_threads); + if (dest_backup->stream) + dest_bytes = dest_backup->pgdata_bytes + dest_backup->wal_bytes; + else + dest_bytes = dest_backup->pgdata_bytes; + + pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes)); + elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes); + time(&start_time); + thread_interrupted = false; + /* Restore files into target directory */ for (i = 0; i < num_threads; i++) { restore_files_arg *arg = &(threads_args[i]); - arg->files = files; - arg->backup = backup; + arg->dest_files = dest_files; + arg->pgdata_files = pgdata_files; + arg->dest_backup = dest_backup; + arg->dest_external_dirs = external_dirs; + arg->parent_chain = parent_chain; + arg->dbOid_exclude_list = dbOid_exclude_list; + arg->skip_external_dirs = params->skip_external_dirs; + arg->to_root = pgdata_path; + arg->use_bitmap = use_bitmap; + arg->incremental_mode = params->incremental_mode; + arg->shift_lsn = params->shift_lsn; + threads_args[i].restored_bytes = 0; /* By default there are some error */ threads_args[i].ret = 1; - elog(LOG, "Start thread for num:%li", parray_num(files)); + /* Useless message TODO: rewrite */ + elog(LOG, "Start thread %i", i + 1); pthread_create(&threads[i], NULL, restore_files, arg); } @@ -411,64 +904,101 @@ restore_backup(pgBackup *backup) pthread_join(threads[i], NULL); if (threads_args[i].ret == 1) restore_isok = false; - } - if (!restore_isok) - elog(ERROR, "Data files restoring failed"); - pfree(threads); - pfree(threads_args); - - /* cleanup */ - parray_walk(files, pgFileFree); - parray_free(files); + total_bytes += threads_args[i].restored_bytes; + } - if (log_level_console <= LOG || log_level_file <= LOG) - elog(LOG, "restore %s backup completed", base36enc(backup->start_time)); -} + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + pretty_size(total_bytes, pretty_total_bytes, lengthof(pretty_total_bytes)); -/* - * Delete files which are not in backup's file list from target pgdata. - * It is necessary to restore incremental backup correctly. - * Files which were deleted between previous and current backup - * are not in the backup's filelist. - */ -static void -remove_deleted_files(pgBackup *backup) -{ - parray *files; - parray *files_restored; - char filelist_path[MAXPGPATH]; - int i; + if (restore_isok) + { + elog(INFO, "Backup files are restored. Transfered bytes: %s, time elapsed: %s", + pretty_total_bytes, pretty_time); - pgBackupGetPath(backup, filelist_path, lengthof(filelist_path), DATABASE_FILE_LIST); - /* Read backup's filelist using target database path as base path */ - files = dir_read_file_list(pgdata, filelist_path); - parray_qsort(files, pgFileComparePathDesc); + elog(INFO, "Restore incremental ratio (less is better): %.f%% (%s/%s)", + ((float) total_bytes / dest_bytes) * 100, + pretty_total_bytes, pretty_dest_bytes); + } + else + elog(ERROR, "Backup files restoring failed. Transfered bytes: %s, time elapsed: %s", + pretty_total_bytes, pretty_time); - /* Get list of files actually existing in target database */ - files_restored = parray_new(); - dir_list_file(files_restored, pgdata, true, true, false); - /* To delete from leaf, sort in reversed order */ - parray_qsort(files_restored, pgFileComparePathDesc); + /* Close page header maps */ + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + cleanup_header_map(&(backup->hdr_map)); + } - for (i = 0; i < parray_num(files_restored); i++) + if (no_sync) + elog(WARNING, "Restored files are not synced to disk"); + else { - pgFile *file = (pgFile *) parray_get(files_restored, i); + elog(INFO, "Syncing restored files to disk"); + time(&start_time); - /* If the file is not in the file list, delete it */ - if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) + for (i = 0; i < parray_num(dest_files); i++) { - pgFileDelete(file); - if (log_level_console <= LOG || log_level_file <= LOG) - elog(LOG, "deleted %s", GetRelativePath(file->path, pgdata)); + char to_fullpath[MAXPGPATH]; + pgFile *dest_file = (pgFile *) parray_get(dest_files, i); + + if (S_ISDIR(dest_file->mode)) + continue; + + /* skip external files if ordered to do so */ + if (dest_file->external_dir_num > 0 && + params->skip_external_dirs) + continue; + + /* construct fullpath */ + if (dest_file->external_dir_num == 0) + { + if (strcmp(PG_TABLESPACE_MAP_FILE, dest_file->rel_path) == 0) + continue; + if (strcmp(DATABASE_MAP, dest_file->rel_path) == 0) + continue; + join_path_components(to_fullpath, pgdata_path, dest_file->rel_path); + } + else + { + char *external_path = parray_get(external_dirs, dest_file->external_dir_num - 1); + join_path_components(to_fullpath, external_path, dest_file->rel_path); + } + + /* TODO: write test for case: file to be synced is missing */ + if (fio_sync(to_fullpath, FIO_DB_HOST) != 0) + elog(ERROR, "Failed to sync file \"%s\": %s", to_fullpath, strerror(errno)); } + + time(&end_time); + pretty_time_interval(difftime(end_time, start_time), + pretty_time, lengthof(pretty_time)); + elog(INFO, "Restored backup files are synced, time elapsed: %s", pretty_time); } /* cleanup */ - parray_walk(files, pgFileFree); - parray_free(files); - parray_walk(files_restored, pgFileFree); - parray_free(files_restored); + pfree(threads); + pfree(threads_args); + + if (external_dirs != NULL) + free_dir_list(external_dirs); + + if (pgdata_files) + { + parray_walk(pgdata_files, pgFileFree); + parray_free(pgdata_files); + } + + for (i = parray_num(parent_chain) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); + + parray_walk(backup->files, pgFileFree); + parray_free(backup->files); + } } /* @@ -477,172 +1007,506 @@ remove_deleted_files(pgBackup *backup) static void * restore_files(void *arg) { - int i; - restore_files_arg *arguments = (restore_files_arg *)arg; + int i; + uint64 n_files; + char to_fullpath[MAXPGPATH]; + FILE *out = NULL; + char *out_buf = pgut_malloc(STDIO_BUFSIZE); + + restore_files_arg *arguments = (restore_files_arg *) arg; - for (i = 0; i < parray_num(arguments->files); i++) + n_files = (unsigned long) parray_num(arguments->dest_files); + + for (i = 0; i < parray_num(arguments->dest_files); i++) { - char from_root[MAXPGPATH]; - char *rel_path; - pgFile *file = (pgFile *) parray_get(arguments->files, i); + bool already_exists = false; + PageState *checksum_map = NULL; /* it should take ~1.5MB at most */ + datapagemap_t *lsn_map = NULL; /* it should take 16kB at most */ + pgFile *dest_file = (pgFile *) parray_get(arguments->dest_files, i); - if (!pg_atomic_test_set_flag(&file->lock)) + /* Directories were created before */ + if (S_ISDIR(dest_file->mode)) continue; - pgBackupGetPath(arguments->backup, from_root, - lengthof(from_root), DATABASE_DIR); + if (!pg_atomic_test_set_flag(&dest_file->lock)) + continue; /* check for interrupt */ - if (interrupted) - elog(ERROR, "interrupted during restore database"); - - rel_path = GetRelativePath(file->path,from_root); + if (interrupted || thread_interrupted) + elog(ERROR, "Interrupted during restore"); if (progress) - elog(LOG, "Progress: (%d/%lu). Process file %s ", - i + 1, (unsigned long) parray_num(arguments->files), rel_path); + elog(INFO, "Progress: (%d/%lu). Restore file \"%s\"", + i + 1, n_files, dest_file->rel_path); - /* - * For PAGE and PTRACK backups skip files which haven't changed - * since previous backup and thus were not backed up. - * We cannot do the same when restoring DELTA backup because we need information - * about every file to correctly truncate them. - */ - if (file->write_size == BYTES_INVALID && - (arguments->backup->backup_mode == BACKUP_MODE_DIFF_PAGE - || arguments->backup->backup_mode == BACKUP_MODE_DIFF_PTRACK)) + /* Only files from pgdata can be skipped by partial restore */ + if (arguments->dbOid_exclude_list && dest_file->external_dir_num == 0) { - elog(VERBOSE, "The file didn`t change. Skip restore: %s", file->path); - continue; + /* Check if the file belongs to the database we exclude */ + if (parray_bsearch(arguments->dbOid_exclude_list, + &dest_file->dbOid, pgCompareOid)) + { + /* + * We cannot simply skip the file, because it may lead to + * failure during WAL redo; hence, create empty file. + */ + create_empty_file(FIO_BACKUP_HOST, + arguments->to_root, FIO_DB_HOST, dest_file); + + elog(VERBOSE, "Skip file due to partial restore: \"%s\"", + dest_file->rel_path); + continue; + } } - /* Directories were created before */ - if (S_ISDIR(file->mode)) + /* Do not restore tablespace_map file */ + if ((dest_file->external_dir_num == 0) && + strcmp(PG_TABLESPACE_MAP_FILE, dest_file->rel_path) == 0) { - elog(VERBOSE, "directory, skip"); + elog(VERBOSE, "Skip tablespace_map"); continue; } - /* Do not restore tablespace_map file */ - if (path_is_prefix_of_path(PG_TABLESPACE_MAP_FILE, rel_path)) + /* Do not restore database_map file */ + if ((dest_file->external_dir_num == 0) && + strcmp(DATABASE_MAP, dest_file->rel_path) == 0) { - elog(VERBOSE, "skip tablespace_map"); + elog(VERBOSE, "Skip database_map"); + continue; + } + + /* Do no restore external directory file if a user doesn't want */ + if (arguments->skip_external_dirs && dest_file->external_dir_num > 0) continue; + + /* set fullpath of destination file */ + if (dest_file->external_dir_num == 0) + join_path_components(to_fullpath, arguments->to_root, dest_file->rel_path); + else + { + char *external_path = parray_get(arguments->dest_external_dirs, + dest_file->external_dir_num - 1); + join_path_components(to_fullpath, external_path, dest_file->rel_path); + } + + if (arguments->incremental_mode != INCR_NONE && + parray_bsearch(arguments->pgdata_files, dest_file, pgFileCompareRelPathWithExternalDesc)) + { + already_exists = true; } /* - * restore the file. - * We treat datafiles separately, cause they were backed up block by - * block and have BackupPageHeader meta information, so we cannot just - * copy the file from backup. + * Handle incremental restore case for data files. + * If file is already exists in pgdata, then + * we scan it block by block and get + * array of checksums for every page. */ - elog(VERBOSE, "Restoring file %s, is_datafile %i, is_cfs %i", - file->path, file->is_datafile?1:0, file->is_cfs?1:0); - if (file->is_datafile && !file->is_cfs) + if (already_exists && + dest_file->is_datafile && !dest_file->is_cfs && + dest_file->n_blocks > 0) { - char to_path[MAXPGPATH]; + if (arguments->incremental_mode == INCR_LSN) + { + lsn_map = fio_get_lsn_map(to_fullpath, arguments->dest_backup->checksum_version, + dest_file->n_blocks, arguments->shift_lsn, + dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + } + else if (arguments->incremental_mode == INCR_CHECKSUM) + { + checksum_map = fio_get_checksum_map(to_fullpath, arguments->dest_backup->checksum_version, + dest_file->n_blocks, arguments->dest_backup->stop_lsn, + dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + } + } - join_path_components(to_path, pgdata, - file->path + strlen(from_root) + 1); - restore_data_file(to_path, file, - arguments->backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - false); + /* + * Open dest file and truncate it to zero, if destination + * file already exists and dest file size is zero, or + * if file do not exist + */ + if ((already_exists && dest_file->write_size == 0) || !already_exists) + out = fio_fopen(to_fullpath, PG_BINARY_W, FIO_DB_HOST); + /* + * If file already exists and dest size is not zero, + * then open it for reading and writing. + */ + else + out = fio_fopen(to_fullpath, PG_BINARY_R "+", FIO_DB_HOST); + + if (out == NULL) + elog(ERROR, "Cannot open restore target file \"%s\": %s", + to_fullpath, strerror(errno)); + + /* update file permission */ + if (fio_chmod(to_fullpath, dest_file->mode, FIO_DB_HOST) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); + + if (!dest_file->is_datafile || dest_file->is_cfs) + elog(VERBOSE, "Restoring nonedata file: \"%s\"", to_fullpath); + else + elog(VERBOSE, "Restoring data file: \"%s\"", to_fullpath); + + // If destination file is 0 sized, then just close it and go for the next + if (dest_file->write_size == 0) + goto done; + + /* Restore destination file */ + if (dest_file->is_datafile && !dest_file->is_cfs) + { + /* enable stdio buffering for local destination data file */ + if (!fio_is_remote_file(out)) + setvbuf(out, out_buf, _IOFBF, STDIO_BUFSIZE); + /* Destination file is data file */ + arguments->restored_bytes += restore_data_file(arguments->parent_chain, + dest_file, out, to_fullpath, + arguments->use_bitmap, checksum_map, + arguments->shift_lsn, lsn_map, true); } else - copy_file(from_root, pgdata, file); + { + /* disable stdio buffering for local destination nonedata file */ + if (!fio_is_remote_file(out)) + setvbuf(out, NULL, _IONBF, BUFSIZ); + /* Destination file is nonedata file */ + arguments->restored_bytes += restore_non_data_file(arguments->parent_chain, + arguments->dest_backup, dest_file, out, to_fullpath, + already_exists); + } + +done: + /* close file */ + if (fio_fclose(out) != 0) + elog(ERROR, "Cannot close file \"%s\": %s", to_fullpath, + strerror(errno)); + + /* free pagemap used for restore optimization */ + pg_free(dest_file->pagemap.bitmap); - /* print size of restored file */ - if (file->write_size != BYTES_INVALID) - elog(LOG, "Restored file %s : " INT64_FORMAT " bytes", - file->path, file->write_size); + if (lsn_map) + pg_free(lsn_map->bitmap); + + pg_free(lsn_map); + pg_free(checksum_map); } + free(out_buf); + + /* ssh connection to longer needed */ + fio_disconnect(); + /* Data files restoring is successful */ arguments->ret = 0; return NULL; } -/* Create recovery.conf with given recovery target parameters */ +/* + * Create recovery.conf (probackup_recovery.conf in case of PG12) + * with given recovery target parameters + */ static void create_recovery_conf(time_t backup_id, pgRecoveryTarget *rt, - pgBackup *backup) + pgBackup *backup, + pgRestoreParams *params) { char path[MAXPGPATH]; FILE *fp; - bool need_restore_conf = false; + bool pitr_requested; + bool target_latest; + bool target_immediate; + bool restore_command_provided = false; + char restore_command_guc[16384]; + + if (instance_config.restore_command && + (pg_strcasecmp(instance_config.restore_command, "none") != 0)) + { + restore_command_provided = true; + } - if (!backup->stream - || (rt->time_specified || rt->xid_specified)) - need_restore_conf = true; + /* restore-target='latest' support */ + target_latest = rt->target_stop != NULL && + strcmp(rt->target_stop, "latest") == 0; + + target_immediate = rt->target_stop != NULL && + strcmp(rt->target_stop, "immediate") == 0; + + /* + * Note that setting restore_command alone interpreted + * as PITR with target - "until all available WAL is replayed". + * We do this because of the following case: + * The user is restoring STREAM backup as replica but + * also relies on WAL archive to catch-up with master. + * If restore_command is provided, then it should be + * added to recovery config. + * In this scenario, "would be" replica will replay + * all WAL segments available in WAL archive, after that + * it will try to connect to master via repprotocol. + * + * The risk is obvious, what if masters current state is + * in "the past" relatively to latest state in the archive? + * We will get a replica that is "in the future" to the master. + * We accept this risk because its probability is low. + */ + pitr_requested = !backup->stream || rt->time_string || + rt->xid_string || rt->lsn_string || rt->target_name || + target_immediate || target_latest || restore_command_provided; /* No need to generate recovery.conf at all. */ - if (!(need_restore_conf || restore_as_replica)) + if (!(pitr_requested || params->restore_as_replica)) + { + /* + * Restoring STREAM backup without PITR and not as replica, + * recovery.signal and standby.signal for PG12 are not needed + * + * We do not add "include" option in this case because + * here we are creating empty "probackup_recovery.conf" + * to handle possible already existing "include" + * directive pointing to "probackup_recovery.conf". + * If don`t do that, recovery will fail. + */ + pg12_recovery_config(backup, false); return; + } elog(LOG, "----------------------------------------"); +#if PG_VERSION_NUM >= 120000 + elog(LOG, "creating probackup_recovery.conf"); + pg12_recovery_config(backup, true); + snprintf(path, lengthof(path), "%s/probackup_recovery.conf", instance_config.pgdata); +#else elog(LOG, "creating recovery.conf"); + snprintf(path, lengthof(path), "%s/recovery.conf", instance_config.pgdata); +#endif - snprintf(path, lengthof(path), "%s/recovery.conf", pgdata); - fp = fopen(path, "wt"); + fp = fio_fopen(path, "w", FIO_DB_HOST); if (fp == NULL) - elog(ERROR, "cannot open recovery.conf \"%s\": %s", path, + elog(ERROR, "cannot open file \"%s\": %s", path, strerror(errno)); - fprintf(fp, "# recovery.conf generated by pg_probackup %s\n", - PROGRAM_VERSION); + if (fio_chmod(path, FILE_PERMISSION, FIO_DB_HOST) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", path, strerror(errno)); - if (need_restore_conf) +#if PG_VERSION_NUM >= 120000 + fio_fprintf(fp, "# probackup_recovery.conf generated by pg_probackup %s\n", + PROGRAM_VERSION); +#else + fio_fprintf(fp, "# recovery.conf generated by pg_probackup %s\n", + PROGRAM_VERSION); +#endif + + /* construct restore_command */ + if (pitr_requested) { + fio_fprintf(fp, "\n## recovery settings\n"); + /* If restore_command is provided, use it. Otherwise construct it from scratch. */ + if (restore_command_provided) + sprintf(restore_command_guc, "%s", instance_config.restore_command); + else + { + /* default cmdline, ok for local restore */ + sprintf(restore_command_guc, "%s archive-get -B %s --instance %s " + "--wal-file-path=%%p --wal-file-name=%%f", + PROGRAM_FULL_PATH ? PROGRAM_FULL_PATH : PROGRAM_NAME, + backup_path, instance_name); + + /* append --remote-* parameters provided via --archive-* settings */ + if (instance_config.archive.host) + { + strcat(restore_command_guc, " --remote-host="); + strcat(restore_command_guc, instance_config.archive.host); + } - fprintf(fp, "restore_command = '%s archive-get -B %s --instance %s " - "--wal-file-path %%p --wal-file-name %%f'\n", - PROGRAM_NAME, backup_path, instance_name); + if (instance_config.archive.port) + { + strcat(restore_command_guc, " --remote-port="); + strcat(restore_command_guc, instance_config.archive.port); + } + + if (instance_config.archive.user) + { + strcat(restore_command_guc, " --remote-user="); + strcat(restore_command_guc, instance_config.archive.user); + } + } /* * We've already checked that only one of the four following mutually * exclusive options is specified, so the order of calls is insignificant. */ - if (rt->recovery_target_name) - fprintf(fp, "recovery_target_name = '%s'\n", rt->recovery_target_name); + if (rt->target_name) + fio_fprintf(fp, "recovery_target_name = '%s'\n", rt->target_name); - if (rt->time_specified) - fprintf(fp, "recovery_target_time = '%s'\n", rt->target_time_string); + if (rt->time_string) + fio_fprintf(fp, "recovery_target_time = '%s'\n", rt->time_string); - if (rt->xid_specified) - fprintf(fp, "recovery_target_xid = '%s'\n", rt->target_xid_string); + if (rt->xid_string) + fio_fprintf(fp, "recovery_target_xid = '%s'\n", rt->xid_string); - if (rt->recovery_target_lsn) - fprintf(fp, "recovery_target_lsn = '%s'\n", rt->target_lsn_string); + if (rt->lsn_string) + fio_fprintf(fp, "recovery_target_lsn = '%s'\n", rt->lsn_string); - if (rt->recovery_target_immediate) - fprintf(fp, "recovery_target = 'immediate'\n"); + if (rt->target_stop && target_immediate) + fio_fprintf(fp, "recovery_target = '%s'\n", rt->target_stop); if (rt->inclusive_specified) - fprintf(fp, "recovery_target_inclusive = '%s'\n", - rt->recovery_target_inclusive?"true":"false"); + fio_fprintf(fp, "recovery_target_inclusive = '%s'\n", + rt->target_inclusive ? "true" : "false"); - if (rt->recovery_target_tli) - fprintf(fp, "recovery_target_timeline = '%u'\n", rt->recovery_target_tli); + if (rt->target_tli) + fio_fprintf(fp, "recovery_target_timeline = '%u'\n", rt->target_tli); + else + { + /* + * In PG12 default recovery target timeline was changed to 'latest', which + * is extremely risky. Explicitly preserve old behavior of recovering to current + * timneline for PG12. + */ +#if PG_VERSION_NUM >= 120000 + fio_fprintf(fp, "recovery_target_timeline = 'current'\n"); +#endif + } + + if (rt->target_action) + fio_fprintf(fp, "recovery_target_action = '%s'\n", rt->target_action); + else + /* default recovery_target_action is 'pause' */ + fio_fprintf(fp, "recovery_target_action = '%s'\n", "pause"); + } - if (rt->recovery_target_action) - fprintf(fp, "recovery_target_action = '%s'\n", rt->recovery_target_action); + if (pitr_requested) + { + elog(LOG, "Setting restore_command to '%s'", restore_command_guc); + fio_fprintf(fp, "restore_command = '%s'\n", restore_command_guc); } - if (restore_as_replica) + if (params->restore_as_replica) { - fprintf(fp, "standby_mode = 'on'\n"); + fio_fprintf(fp, "\n## standby settings\n"); + /* standby_mode was removed in PG12 */ +#if PG_VERSION_NUM < 120000 + fio_fprintf(fp, "standby_mode = 'on'\n"); +#endif - if (backup->primary_conninfo) - fprintf(fp, "primary_conninfo = '%s'\n", backup->primary_conninfo); + if (params->primary_conninfo) + fio_fprintf(fp, "primary_conninfo = '%s'\n", params->primary_conninfo); + else if (backup->primary_conninfo) + fio_fprintf(fp, "primary_conninfo = '%s'\n", backup->primary_conninfo); + + if (params->primary_slot_name != NULL) + fio_fprintf(fp, "primary_slot_name = '%s'\n", params->primary_slot_name); } - if (fflush(fp) != 0 || - fsync(fileno(fp)) != 0 || - fclose(fp)) - elog(ERROR, "cannot write recovery.conf \"%s\": %s", path, + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write file \"%s\": %s", path, strerror(errno)); + +#if PG_VERSION_NUM >= 120000 + /* + * Create "recovery.signal" to mark this recovery as PITR for PostgreSQL. + * In older versions presense of recovery.conf alone was enough. + * To keep behaviour consistent with older versions, + * we are forced to create "recovery.signal" + * even when only restore_command is provided. + * Presense of "recovery.signal" by itself determine only + * one thing: do PostgreSQL must switch to a new timeline + * after successfull recovery or not? + */ + if (pitr_requested) + { + elog(LOG, "creating recovery.signal file"); + snprintf(path, lengthof(path), "%s/recovery.signal", instance_config.pgdata); + + fp = fio_fopen(path, "w", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot open file \"%s\": %s", path, + strerror(errno)); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write file \"%s\": %s", path, + strerror(errno)); + } + + if (params->restore_as_replica) + { + elog(LOG, "creating standby.signal file"); + snprintf(path, lengthof(path), "%s/standby.signal", instance_config.pgdata); + + fp = fio_fopen(path, "w", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot open file \"%s\": %s", path, + strerror(errno)); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write file \"%s\": %s", path, + strerror(errno)); + } +#endif +} + +/* + * Create empty probackup_recovery.conf in PGDATA and + * add "include" directive to postgresql.auto.conf + + * When restoring PG12 we always(!) must do this, even + * when restoring STREAM backup without PITR or replica options + * because restored instance may have been previously backed up + * and restored again and user didn`t cleaned up postgresql.auto.conf. + + * So for recovery to work regardless of all this factors + * we must always create empty probackup_recovery.conf file. + */ +static void +pg12_recovery_config(pgBackup *backup, bool add_include) +{ +#if PG_VERSION_NUM >= 120000 + char probackup_recovery_path[MAXPGPATH]; + char postgres_auto_path[MAXPGPATH]; + FILE *fp; + + if (add_include) + { + char current_time_str[100]; + + time2iso(current_time_str, lengthof(current_time_str), current_time); + + snprintf(postgres_auto_path, lengthof(postgres_auto_path), + "%s/postgresql.auto.conf", instance_config.pgdata); + + fp = fio_fopen(postgres_auto_path, "a", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot write to file \"%s\": %s", postgres_auto_path, + strerror(errno)); + + // TODO: check if include 'probackup_recovery.conf' already exists + fio_fprintf(fp, "\n# created by pg_probackup restore of backup %s at '%s'\n", + base36enc(backup->start_time), current_time_str); + fio_fprintf(fp, "include '%s'\n", "probackup_recovery.conf"); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write to file \"%s\": %s", postgres_auto_path, + strerror(errno)); + } + + /* Create empty probackup_recovery.conf */ + snprintf(probackup_recovery_path, lengthof(probackup_recovery_path), + "%s/probackup_recovery.conf", instance_config.pgdata); + fp = fio_fopen(probackup_recovery_path, "w", FIO_DB_HOST); + if (fp == NULL) + elog(ERROR, "cannot open file \"%s\": %s", probackup_recovery_path, + strerror(errno)); + + if (fio_fflush(fp) != 0 || + fio_fclose(fp)) + elog(ERROR, "cannot write to file \"%s\": %s", probackup_recovery_path, + strerror(errno)); +#endif + return; } /* @@ -655,7 +1519,7 @@ create_recovery_conf(time_t backup_id, * based on readTimeLineHistory() in timeline.c */ parray * -readTimeLineHistory_probackup(TimeLineID targetTLI) +read_timeline_history(const char *arclog_path, TimeLineID targetTLI, bool strict) { parray *result; char path[MAXPGPATH]; @@ -679,8 +1543,11 @@ readTimeLineHistory_probackup(TimeLineID targetTLI) strerror(errno)); /* There is no history file for target timeline */ - elog(ERROR, "recovery target timeline %u does not exist", - targetTLI); + if (strict) + elog(ERROR, "recovery target timeline %u does not exist", + targetTLI); + else + return NULL; } } @@ -734,6 +1601,9 @@ readTimeLineHistory_probackup(TimeLineID targetTLI) /* we ignore the remainder of each line */ } + if (fd && (ferror(fd))) + elog(ERROR, "Failed to read from file: \"%s\"", path); + if (fd) fclose(fd); @@ -744,28 +1614,29 @@ readTimeLineHistory_probackup(TimeLineID targetTLI) entry = pgut_new(TimeLineHistoryEntry); entry->tli = targetTLI; /* LSN in target timeline is valid */ - /* TODO ensure that -1UL --> -1L fix is correct */ - entry->end = (uint32) (-1L << 32) | -1L; + entry->end = InvalidXLogRecPtr; parray_insert(result, 0, entry); return result; } +/* TODO: do not ignore timelines. What if requested target located in different timeline? */ bool satisfy_recovery_target(const pgBackup *backup, const pgRecoveryTarget *rt) { - if (rt->xid_specified) - return backup->recovery_xid <= rt->recovery_target_xid; + if (rt->xid_string) + return backup->recovery_xid <= rt->target_xid; - if (rt->time_specified) - return backup->recovery_time <= rt->recovery_target_time; + if (rt->time_string) + return backup->recovery_time <= rt->target_time; - if (rt->lsn_specified) - return backup->stop_lsn <= rt->recovery_target_lsn; + if (rt->lsn_string) + return backup->stop_lsn <= rt->target_lsn; return true; } +/* TODO description */ bool satisfy_timeline(const parray *timelines, const pgBackup *backup) { @@ -777,11 +1648,34 @@ satisfy_timeline(const parray *timelines, const pgBackup *backup) timeline = (TimeLineHistoryEntry *) parray_get(timelines, i); if (backup->tli == timeline->tli && - backup->stop_lsn < timeline->end) + (XLogRecPtrIsInvalid(timeline->end) || + backup->stop_lsn <= timeline->end)) return true; } return false; } + +/* timelines represents a history of one particular timeline, + * we must determine whether a target tli is part of that history. + * + * /--------* + * ---------*--------------> + */ +bool +tliIsPartOfHistory(const parray *timelines, TimeLineID tli) +{ + int i; + + for (i = 0; i < parray_num(timelines); i++) + { + TimeLineHistoryEntry *timeline = (TimeLineHistoryEntry *) parray_get(timelines, i); + + if (tli == timeline->tli) + return true; + } + + return false; +} /* * Get recovery options in the string format, parse them * and fill up the pgRecoveryTarget structure. @@ -792,15 +1686,11 @@ parseRecoveryTargetOptions(const char *target_time, const char *target_inclusive, TimeLineID target_tli, const char *target_lsn, - bool target_immediate, + const char *target_stop, const char *target_name, - const char *target_action, - bool restore_no_validate) + const char *target_action) { - time_t dummy_time; - TransactionId dummy_xid; bool dummy_bool; - XLogRecPtr dummy_lsn; /* * count the number of the mutually exclusive options which may specify * recovery target. If final value > 1, throw an error. @@ -809,112 +1699,333 @@ parseRecoveryTargetOptions(const char *target_time, pgRecoveryTarget *rt = pgut_new(pgRecoveryTarget); /* fill all options with default values */ - rt->time_specified = false; - rt->xid_specified = false; - rt->inclusive_specified = false; - rt->lsn_specified = false; - rt->recovery_target_time = 0; - rt->recovery_target_xid = 0; - rt->recovery_target_lsn = InvalidXLogRecPtr; - rt->target_time_string = NULL; - rt->target_xid_string = NULL; - rt->target_lsn_string = NULL; - rt->recovery_target_inclusive = false; - rt->recovery_target_tli = 0; - rt->recovery_target_immediate = false; - rt->recovery_target_name = NULL; - rt->recovery_target_action = NULL; - rt->restore_no_validate = false; + MemSet(rt, 0, sizeof(pgRecoveryTarget)); /* parse given options */ if (target_time) { + time_t dummy_time; + recovery_target_specified++; - rt->time_specified = true; - rt->target_time_string = target_time; + rt->time_string = target_time; if (parse_time(target_time, &dummy_time, false)) - rt->recovery_target_time = dummy_time; + rt->target_time = dummy_time; else - elog(ERROR, "Invalid value of --time option %s", target_time); + elog(ERROR, "Invalid value for '--recovery-target-time' option '%s'", + target_time); } if (target_xid) { + TransactionId dummy_xid; + recovery_target_specified++; - rt->xid_specified = true; - rt->target_xid_string = target_xid; + rt->xid_string = target_xid; #ifdef PGPRO_EE if (parse_uint64(target_xid, &dummy_xid, 0)) #else if (parse_uint32(target_xid, &dummy_xid, 0)) #endif - rt->recovery_target_xid = dummy_xid; + rt->target_xid = dummy_xid; else - elog(ERROR, "Invalid value of --xid option %s", target_xid); + elog(ERROR, "Invalid value for '--recovery-target-xid' option '%s'", + target_xid); } if (target_lsn) { + XLogRecPtr dummy_lsn; + recovery_target_specified++; - rt->lsn_specified = true; - rt->target_lsn_string = target_lsn; + rt->lsn_string = target_lsn; if (parse_lsn(target_lsn, &dummy_lsn)) - rt->recovery_target_lsn = dummy_lsn; + rt->target_lsn = dummy_lsn; else - elog(ERROR, "Invalid value of --lsn option %s", target_lsn); + elog(ERROR, "Invalid value of '--recovery-target-lsn' option '%s'", + target_lsn); } if (target_inclusive) { rt->inclusive_specified = true; if (parse_bool(target_inclusive, &dummy_bool)) - rt->recovery_target_inclusive = dummy_bool; + rt->target_inclusive = dummy_bool; else - elog(ERROR, "Invalid value of --inclusive option %s", target_inclusive); + elog(ERROR, "Invalid value for '--recovery-target-inclusive' option '%s'", + target_inclusive); } - rt->recovery_target_tli = target_tli; - if (target_immediate) + rt->target_tli = target_tli; + if (target_stop) { - recovery_target_specified++; - rt->recovery_target_immediate = target_immediate; - } + if ((strcmp(target_stop, "immediate") != 0) + && (strcmp(target_stop, "latest") != 0)) + elog(ERROR, "Invalid value for '--recovery-target' option '%s'", + target_stop); - if (restore_no_validate) - { - rt->restore_no_validate = restore_no_validate; + recovery_target_specified++; + rt->target_stop = target_stop; } if (target_name) { recovery_target_specified++; - rt->recovery_target_name = target_name; + rt->target_name = target_name; } if (target_action) { - rt->recovery_target_action = target_action; - if ((strcmp(target_action, "pause") != 0) && (strcmp(target_action, "promote") != 0) && (strcmp(target_action, "shutdown") != 0)) - elog(ERROR, "Invalid value of --recovery-target-action option %s", target_action); - } - else - { - /* Default recovery target action is pause */ - rt->recovery_target_action = "pause"; + elog(ERROR, "Invalid value for '--recovery-target-action' option '%s'", + target_action); + + rt->target_action = target_action; } /* More than one mutually exclusive option was defined. */ if (recovery_target_specified > 1) - elog(ERROR, "At most one of --immediate, --target-name, --time, --xid, or --lsn can be used"); + elog(ERROR, "At most one of '--recovery-target', '--recovery-target-name', " + "'--recovery-target-time', '--recovery-target-xid' or " + "'--recovery-target-lsn' options can be specified"); - /* If none of the options is defined, '--inclusive' option is meaningless */ - if (!(rt->xid_specified || rt->time_specified || rt->lsn_specified) && rt->recovery_target_inclusive) - elog(ERROR, "--inclusive option applies when either --time or --xid is specified"); + /* + * If none of the options is defined, '--recovery-target-inclusive' option + * is meaningless. + */ + if (!(rt->xid_string || rt->time_string || rt->lsn_string) && + rt->target_inclusive) + elog(ERROR, "The '--recovery-target-inclusive' option can be applied only when " + "either of '--recovery-target-time', '--recovery-target-xid' or " + "'--recovery-target-lsn' options is specified"); + + /* If none of the options is defined, '--recovery-target-action' is meaningless */ + if (rt->target_action && recovery_target_specified == 0) + elog(ERROR, "The '--recovery-target-action' option can be applied only when " + "either of '--recovery-target', '--recovery-target-time', '--recovery-target-xid', " + "'--recovery-target-lsn' or '--recovery-target-name' options is specified"); + + /* TODO: sanity for recovery-target-timeline */ return rt; } + +/* + * Return array of dbOids of databases that should not be restored + * Regardless of what option user used, db-include or db-exclude, + * we always convert it into exclude_list. + */ +parray * +get_dbOid_exclude_list(pgBackup *backup, parray *datname_list, + PartialRestoreType partial_restore_type) +{ + int i; + int j; +// pg_crc32 crc; + parray *database_map = NULL; + parray *dbOid_exclude_list = NULL; + pgFile *database_map_file = NULL; + char path[MAXPGPATH]; + char database_map_path[MAXPGPATH]; + parray *files = NULL; + + files = get_backup_filelist(backup, true); + + /* look for 'database_map' file in backup_content.control */ + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + + if ((file->external_dir_num == 0) && + strcmp(DATABASE_MAP, file->name) == 0) + { + database_map_file = file; + break; + } + } + + if (!database_map_file) + elog(ERROR, "Backup %s doesn't contain a database_map, partial restore is impossible.", + base36enc(backup->start_time)); + + join_path_components(path, backup->root_dir, DATABASE_DIR); + join_path_components(database_map_path, path, DATABASE_MAP); + + /* check database_map CRC */ +// crc = pgFileGetCRC(database_map_path, true, true, NULL, FIO_LOCAL_HOST); +// +// if (crc != database_map_file->crc) +// elog(ERROR, "Invalid CRC of backup file \"%s\" : %X. Expected %X", +// database_map_file->path, crc, database_map_file->crc); + + /* get database_map from file */ + database_map = read_database_map(backup); + + /* partial restore requested but database_map is missing */ + if (!database_map) + elog(ERROR, "Backup %s has empty or mangled database_map, partial restore is impossible.", + base36enc(backup->start_time)); + + /* + * So we have a list of datnames and a database_map for it. + * We must construct a list of dbOids to exclude. + */ + if (partial_restore_type == INCLUDE) + { + /* For 'include', keep dbOid of every datname NOT specified by user */ + for (i = 0; i < parray_num(datname_list); i++) + { + bool found_match = false; + char *datname = (char *) parray_get(datname_list, i); + + for (j = 0; j < parray_num(database_map); j++) + { + db_map_entry *db_entry = (db_map_entry *) parray_get(database_map, j); + + /* got a match */ + if (strcmp(db_entry->datname, datname) == 0) + { + found_match = true; + /* for db-include we must exclude db_entry from database_map */ + parray_remove(database_map, j); + j--; + } + } + /* If specified datname is not found in database_map, error out */ + if (!found_match) + elog(ERROR, "Failed to find a database '%s' in database_map of backup %s", + datname, base36enc(backup->start_time)); + } + + /* At this moment only databases to exclude are left in the map */ + for (j = 0; j < parray_num(database_map); j++) + { + db_map_entry *db_entry = (db_map_entry *) parray_get(database_map, j); + + if (!dbOid_exclude_list) + dbOid_exclude_list = parray_new(); + parray_append(dbOid_exclude_list, &db_entry->dbOid); + } + } + else if (partial_restore_type == EXCLUDE) + { + /* For exclude, job is easier - find dbOid for every specified datname */ + for (i = 0; i < parray_num(datname_list); i++) + { + bool found_match = false; + char *datname = (char *) parray_get(datname_list, i); + + for (j = 0; j < parray_num(database_map); j++) + { + db_map_entry *db_entry = (db_map_entry *) parray_get(database_map, j); + + /* got a match */ + if (strcmp(db_entry->datname, datname) == 0) + { + found_match = true; + /* for db-exclude we must add dbOid to exclude list */ + if (!dbOid_exclude_list) + dbOid_exclude_list = parray_new(); + parray_append(dbOid_exclude_list, &db_entry->dbOid); + } + } + /* If specified datname is not found in database_map, error out */ + if (!found_match) + elog(ERROR, "Failed to find a database '%s' in database_map of backup %s", + datname, base36enc(backup->start_time)); + } + } + + /* extra sanity: ensure that list is not empty */ + if (!dbOid_exclude_list || parray_num(dbOid_exclude_list) < 1) + elog(ERROR, "Failed to find a match in database_map of backup %s for partial restore", + base36enc(backup->start_time)); + + /* clean backup filelist */ + if (files) + { + parray_walk(files, pgFileFree); + parray_free(files); + } + + /* sort dbOid array in ASC order */ + parray_qsort(dbOid_exclude_list, pgCompareOid); + + return dbOid_exclude_list; +} + +/* Check that instance is suitable for incremental restore + * Depending on type of incremental restore requirements are differs. + */ +void +check_incremental_compatibility(const char *pgdata, uint64 system_identifier, + IncrRestoreMode incremental_mode) +{ + uint64 system_id_pgdata; + bool success = true; + pid_t pid; + char backup_label[MAXPGPATH]; + + /* slurp pg_control and check that system ID is the same */ + /* check that instance is not running */ + /* if lsn_based, check that there is no backup_label files is around AND + * get redo point lsn from destination pg_control. + + * It is really important to be sure that pg_control is in cohesion with + * data files content, because based on pg_control information we will + * choose a backup suitable for lsn based incremental restore. + */ + + system_id_pgdata = get_system_identifier(pgdata); + + if (system_id_pgdata != instance_config.system_identifier) + { + elog(WARNING, "Backup catalog was initialized for system id %lu, " + "but destination directory system id is %lu", + system_identifier, system_id_pgdata); + success = false; + } + + /* check postmaster pid */ + pid = fio_check_postmaster(pgdata, FIO_DB_HOST); + + if (pid == 1) /* postmaster.pid is mangled */ + { + char pid_file[MAXPGPATH]; + + snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pgdata); + elog(WARNING, "Pid file \"%s\" is mangled, cannot determine whether postmaster is running or not", + pid_file); + success = false; + } + else if (pid > 1) /* postmaster is up */ + { + elog(WARNING, "Postmaster with pid %u is running in destination directory \"%s\"", + pid, pgdata); + success = false; + } + + /* + * TODO: maybe there should be some other signs, pointing to pg_control + * desynchronization with cluster state. + */ + if (incremental_mode == INCR_LSN) + { + snprintf(backup_label, MAXPGPATH, "%s/backup_label", pgdata); + if (fio_access(backup_label, F_OK, FIO_DB_HOST) == 0) + { + elog(WARNING, "Destination directory contains \"backup_control\" file. " + "This does NOT mean that you should delete this file and retry, only that " + "incremental restore in 'lsn' mode may produce incorrect result, when applied " + "to cluster with pg_control not synchronized with cluster state." + "Consider to use incremental restore in 'checksum' mode"); + success = false; + } + } + + if (!success) + elog(ERROR, "Incremental restore is impossible"); +} diff --git a/src/show.c b/src/show.c index f240ce933..81a16ad64 100644 --- a/src/show.c +++ b/src/show.c @@ -3,7 +3,7 @@ * show.c: show backup information. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2018, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -11,200 +11,244 @@ #include "pg_probackup.h" #include -#include #include #include -#include "pqexpbuffer.h" - #include "utils/json.h" +#define half_rounded(x) (((x) + ((x) < 0 ? 0 : 1)) / 2) + +/* struct to align fields printed in plain format */ +typedef struct ShowBackendRow +{ + const char *instance; + const char *version; + char backup_id[20]; + char recovery_time[100]; + const char *mode; + const char *wal_mode; + char tli[20]; + char duration[20]; + char data_bytes[20]; + char wal_bytes[20]; + char zratio[20]; + char start_lsn[20]; + char stop_lsn[20]; + const char *status; +} ShowBackendRow; + +/* struct to align fields printed in plain format */ +typedef struct ShowArchiveRow +{ + char tli[20]; + char parent_tli[20]; + char switchpoint[20]; + char min_segno[MAXFNAMELEN]; + char max_segno[MAXFNAMELEN]; + char n_segments[20]; + char size[20]; + char zratio[20]; + const char *status; + char n_backups[20]; +} ShowArchiveRow; static void show_instance_start(void); static void show_instance_end(void); -static void show_instance(time_t requested_backup_id, bool show_name); -static int show_backup(time_t requested_backup_id); +static void show_instance(const char *instance_name, time_t requested_backup_id, bool show_name); +static void print_backup_json_object(PQExpBuffer buf, pgBackup *backup); +static int show_backup(const char *instance_name, time_t requested_backup_id); + +static void show_instance_plain(const char *instance_name, parray *backup_list, bool show_name); +static void show_instance_json(const char *instance_name, parray *backup_list); -static void show_instance_plain(parray *backup_list, bool show_name); -static void show_instance_json(parray *backup_list); +static void show_instance_archive(InstanceConfig *instance); +static void show_archive_plain(const char *instance_name, uint32 xlog_seg_size, + parray *timelines_list, bool show_name); +static void show_archive_json(const char *instance_name, uint32 xlog_seg_size, + parray *tli_list); static PQExpBufferData show_buf; static bool first_instance = true; static int32 json_level = 0; +/* + * Entry point of pg_probackup SHOW subcommand. + */ int -do_show(time_t requested_backup_id) +do_show(const char *instance_name, time_t requested_backup_id, bool show_archive) { + int i; + if (instance_name == NULL && requested_backup_id != INVALID_BACKUP_ID) - elog(ERROR, "You must specify --instance to use --backup_id option"); + elog(ERROR, "You must specify --instance to use (-i, --backup-id) option"); + if (show_archive && + requested_backup_id != INVALID_BACKUP_ID) + elog(ERROR, "You cannot specify --archive and (-i, --backup-id) options together"); + + /* + * if instance_name is not specified, + * show information about all instances in this backup catalog + */ if (instance_name == NULL) { - /* Show list of instances */ - char path[MAXPGPATH]; - DIR *dir; - struct dirent *dent; - - /* open directory and list contents */ - join_path_components(path, backup_path, BACKUPS_DIR); - dir = opendir(path); - if (dir == NULL) - elog(ERROR, "Cannot open directory \"%s\": %s", - path, strerror(errno)); + parray *instances = catalog_get_instance_list(); show_instance_start(); - - while (errno = 0, (dent = readdir(dir)) != NULL) + for (i = 0; i < parray_num(instances); i++) { - char child[MAXPGPATH]; - struct stat st; - - /* skip entries point current dir or parent dir */ - if (strcmp(dent->d_name, ".") == 0 || - strcmp(dent->d_name, "..") == 0) - continue; - - join_path_components(child, path, dent->d_name); - - if (lstat(child, &st) == -1) - elog(ERROR, "Cannot stat file \"%s\": %s", - child, strerror(errno)); - - if (!S_ISDIR(st.st_mode)) - continue; + InstanceConfig *instance = parray_get(instances, i); + char backup_instance_path[MAXPGPATH]; - instance_name = dent->d_name; - sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance_name); + sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance->name); - show_instance(INVALID_BACKUP_ID, true); + if (show_archive) + show_instance_archive(instance); + else + show_instance(instance->name, INVALID_BACKUP_ID, true); } - - if (errno) - elog(ERROR, "Cannot read directory \"%s\": %s", - path, strerror(errno)); - - if (closedir(dir)) - elog(ERROR, "Cannot close directory \"%s\": %s", - path, strerror(errno)); - show_instance_end(); return 0; } - else if (requested_backup_id == INVALID_BACKUP_ID || - show_format == SHOW_JSON) + /* always use */ + else if (show_format == SHOW_JSON || + requested_backup_id == INVALID_BACKUP_ID) { show_instance_start(); - show_instance(requested_backup_id, false); + + if (show_archive) + { + InstanceConfig *instance = readInstanceConfigFile(instance_name); + show_instance_archive(instance); + } + else + show_instance(instance_name, requested_backup_id, false); + show_instance_end(); return 0; } else - return show_backup(requested_backup_id); + { + if (show_archive) + { + InstanceConfig *instance = readInstanceConfigFile(instance_name); + show_instance_archive(instance); + } + else + show_backup(instance_name, requested_backup_id); + + return 0; + } } -static void +void pretty_size(int64 size, char *buf, size_t len) { - int exp = 0; + int64 limit = 10 * 1024; + int64 limit2 = limit * 2 - 1; /* minus means the size is invalid */ - if (size < 0) - { - strncpy(buf, "----", len); - return; - } +// if (size < 0) +// { +// strncpy(buf, "----", len); +// return; +// } - /* determine postfix */ - while (size > 9999) + if (size <= 0) { - ++exp; - size /= 1000; + strncpy(buf, "0", len); + return; } - switch (exp) + if (Abs(size) < limit) + snprintf(buf, len, "%dB", (int) size); + else { - case 0: - snprintf(buf, len, "%dB", (int) size); - break; - case 1: - snprintf(buf, len, "%dkB", (int) size); - break; - case 2: - snprintf(buf, len, "%dMB", (int) size); - break; - case 3: - snprintf(buf, len, "%dGB", (int) size); - break; - case 4: - snprintf(buf, len, "%dTB", (int) size); - break; - case 5: - snprintf(buf, len, "%dPB", (int) size); - break; - default: - strncpy(buf, "***", len); - break; + size >>= 9; + if (Abs(size) < limit2) + snprintf(buf, len, "%dkB", (int) half_rounded(size)); + else + { + size >>= 10; + if (Abs(size) < limit2) + snprintf(buf, len, "%dMB", (int) half_rounded(size)); + else + { + size >>= 10; + if (Abs(size) < limit2) + snprintf(buf, len, "%dGB", (int) half_rounded(size)); + else + { + size >>= 10; + snprintf(buf, len, "%dTB", (int) half_rounded(size)); + } + } + } } } -static TimeLineID -get_parent_tli(TimeLineID child_tli) +void +pretty_time_interval(double time, char *buf, size_t len) { - TimeLineID result = 0; - char path[MAXPGPATH]; - char fline[MAXPGPATH]; - FILE *fd; + int num_seconds = 0; + int milliseconds = 0; + int seconds = 0; + int minutes = 0; + int hours = 0; + int days = 0; - /* Timeline 1 does not have a history file and parent timeline */ - if (child_tli == 1) - return 0; + num_seconds = (int) time; - /* Search history file in archives */ - snprintf(path, lengthof(path), "%s/%08X.history", arclog_path, - child_tli); - fd = fopen(path, "rt"); - if (fd == NULL) + if (time <= 0) { - if (errno != ENOENT) - elog(ERROR, "could not open file \"%s\": %s", path, - strerror(errno)); + strncpy(buf, "0", len); + return; + } - /* Did not find history file, do not raise the error */ - return 0; + days = num_seconds / (24 * 3600); + num_seconds %= (24 * 3600); + + hours = num_seconds / 3600; + num_seconds %= 3600; + + minutes = num_seconds / 60; + num_seconds %= 60; + + seconds = num_seconds; + milliseconds = (int)((time - (int) time) * 1000.0); + + if (days > 0) + { + snprintf(buf, len, "%dd:%dh", days, hours); + return; } - /* - * Parse the file... - */ - while (fgets(fline, sizeof(fline), fd) != NULL) + if (hours > 0) { - /* skip leading whitespace and check for # comment */ - char *ptr; - char *endptr; + snprintf(buf, len, "%dh:%dm", hours, minutes); + return; + } - for (ptr = fline; *ptr; ptr++) - { - if (!IsSpace(*ptr)) - break; - } - if (*ptr == '\0' || *ptr == '#') - continue; - - /* expect a numeric timeline ID as first field of line */ - result = (TimeLineID) strtoul(ptr, &endptr, 0); - if (endptr == ptr) - elog(ERROR, - "syntax error(timeline ID) in history file: %s", - fline); + if (minutes > 0) + { + snprintf(buf, len, "%dm:%ds", minutes, seconds); + return; } - fclose(fd); + if (seconds > 0) + { + if (milliseconds > 0) + snprintf(buf, len, "%ds:%dms", seconds, milliseconds); + else + snprintf(buf, len, "%ds", seconds); + return; + } - /* TLI of the last line is parent TLI */ - return result; + snprintf(buf, len, "%dms", milliseconds); + return; } /* @@ -242,16 +286,16 @@ show_instance_end(void) * Show brief meta information about all backups in the backup instance. */ static void -show_instance(time_t requested_backup_id, bool show_name) +show_instance(const char *instance_name, time_t requested_backup_id, bool show_name) { parray *backup_list; - backup_list = catalog_get_backup_list(requested_backup_id); + backup_list = catalog_get_backup_list(instance_name, requested_backup_id); if (show_format == SHOW_PLAIN) - show_instance_plain(backup_list, show_name); + show_instance_plain(instance_name, backup_list, show_name); else if (show_format == SHOW_JSON) - show_instance_json(backup_list); + show_instance_json(instance_name, backup_list); else elog(ERROR, "Invalid show format %d", (int) show_format); @@ -260,17 +304,173 @@ show_instance(time_t requested_backup_id, bool show_name) parray_free(backup_list); } +/* helper routine to print backup info as json object */ +static void +print_backup_json_object(PQExpBuffer buf, pgBackup *backup) +{ + TimeLineID parent_tli = 0; + char timestamp[100] = "----"; + char lsn[20]; + + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_value(buf, "id", base36enc(backup->start_time), json_level, + true); + + if (backup->parent_backup != 0) + json_add_value(buf, "parent-backup-id", + base36enc(backup->parent_backup), json_level, true); + + json_add_value(buf, "backup-mode", pgBackupGetBackupMode(backup), + json_level, true); + + json_add_value(buf, "wal", backup->stream ? "STREAM": "ARCHIVE", + json_level, true); + + json_add_value(buf, "compress-alg", + deparse_compress_alg(backup->compress_alg), json_level, + true); + + json_add_key(buf, "compress-level", json_level); + appendPQExpBuffer(buf, "%d", backup->compress_level); + + json_add_value(buf, "from-replica", + backup->from_replica ? "true" : "false", json_level, + true); + + json_add_key(buf, "block-size", json_level); + appendPQExpBuffer(buf, "%u", backup->block_size); + + json_add_key(buf, "xlog-block-size", json_level); + appendPQExpBuffer(buf, "%u", backup->wal_block_size); + + json_add_key(buf, "checksum-version", json_level); + appendPQExpBuffer(buf, "%u", backup->checksum_version); + + json_add_value(buf, "program-version", backup->program_version, + json_level, true); + json_add_value(buf, "server-version", backup->server_version, + json_level, true); + + json_add_key(buf, "current-tli", json_level); + appendPQExpBuffer(buf, "%d", backup->tli); + + json_add_key(buf, "parent-tli", json_level); + + /* Only incremental backup can have Parent TLI */ + if (backup->parent_backup_link) + parent_tli = backup->parent_backup_link->tli; + + appendPQExpBuffer(buf, "%u", parent_tli); + + snprintf(lsn, lengthof(lsn), "%X/%X", + (uint32) (backup->start_lsn >> 32), (uint32) backup->start_lsn); + json_add_value(buf, "start-lsn", lsn, json_level, true); + + snprintf(lsn, lengthof(lsn), "%X/%X", + (uint32) (backup->stop_lsn >> 32), (uint32) backup->stop_lsn); + json_add_value(buf, "stop-lsn", lsn, json_level, true); + + time2iso(timestamp, lengthof(timestamp), backup->start_time); + json_add_value(buf, "start-time", timestamp, json_level, true); + + if (backup->end_time) + { + time2iso(timestamp, lengthof(timestamp), backup->end_time); + json_add_value(buf, "end-time", timestamp, json_level, true); + } + + json_add_key(buf, "recovery-xid", json_level); + appendPQExpBuffer(buf, XID_FMT, backup->recovery_xid); + + if (backup->recovery_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->recovery_time); + json_add_value(buf, "recovery-time", timestamp, json_level, true); + } + + if (backup->expire_time > 0) + { + time2iso(timestamp, lengthof(timestamp), backup->expire_time); + json_add_value(buf, "expire-time", timestamp, json_level, true); + } + + if (backup->data_bytes != BYTES_INVALID) + { + json_add_key(buf, "data-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->data_bytes); + } + + if (backup->wal_bytes != BYTES_INVALID) + { + json_add_key(buf, "wal-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->wal_bytes); + } + + if (backup->uncompressed_bytes >= 0) + { + json_add_key(buf, "uncompressed-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->uncompressed_bytes); + } + + if (backup->uncompressed_bytes >= 0) + { + json_add_key(buf, "pgdata-bytes", json_level); + appendPQExpBuffer(buf, INT64_FORMAT, backup->pgdata_bytes); + } + + if (backup->primary_conninfo) + json_add_value(buf, "primary_conninfo", backup->primary_conninfo, + json_level, true); + + if (backup->external_dir_str) + json_add_value(buf, "external-dirs", backup->external_dir_str, + json_level, true); + + json_add_value(buf, "status", status2str(backup->status), json_level, + true); + + if (backup->note) + json_add_value(buf, "note", backup->note, + json_level, true); + + if (backup->content_crc != 0) + { + json_add_key(buf, "content-crc", json_level); + appendPQExpBuffer(buf, "%u", backup->content_crc); + } + + json_add(buf, JT_END_OBJECT, &json_level); +} + /* * Show detailed meta information about specified backup. */ static int -show_backup(time_t requested_backup_id) +show_backup(const char *instance_name, time_t requested_backup_id) { - pgBackup *backup; + int i; + pgBackup *backup = NULL; + parray *backups; + + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); + + /* Find requested backup */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backups, i); + + /* found target */ + if (tmp_backup->start_time == requested_backup_id) + { + backup = tmp_backup; + break; + } + } - backup = read_backup(requested_backup_id); if (backup == NULL) { + // TODO for 3.0: we should ERROR out here. elog(INFO, "Requested backup \"%s\" is not found.", /* We do not need free base36enc's result, we exit anyway */ base36enc(requested_backup_id)); @@ -284,89 +484,252 @@ show_backup(time_t requested_backup_id) elog(ERROR, "Invalid show format %d", (int) show_format); /* cleanup */ - pgBackupFree(backup); + parray_walk(backups, pgBackupFree); + parray_free(backups); return 0; } -/* - * Plain output. - */ - /* * Show instance backups in plain format. */ static void -show_instance_plain(parray *backup_list, bool show_name) +show_instance_plain(const char *instance_name, parray *backup_list, bool show_name) { +#define SHOW_FIELDS_COUNT 14 int i; + const char *names[SHOW_FIELDS_COUNT] = + { "Instance", "Version", "ID", "Recovery Time", + "Mode", "WAL Mode", "TLI", "Time", "Data", "WAL", + "Zratio", "Start LSN", "Stop LSN", "Status" }; + const char *field_formats[SHOW_FIELDS_COUNT] = + { " %-*s ", " %-*s ", " %-*s ", " %-*s ", + " %-*s ", " %-*s ", " %-*s ", " %*s ", " %*s ", " %*s ", + " %*s ", " %-*s ", " %-*s ", " %-*s "}; + uint32 widths[SHOW_FIELDS_COUNT]; + uint32 widths_sum = 0; + ShowBackendRow *rows; + TimeLineID parent_tli = 0; + + for (i = 0; i < SHOW_FIELDS_COUNT; i++) + widths[i] = strlen(names[i]); + + rows = (ShowBackendRow *) palloc(parray_num(backup_list) * + sizeof(ShowBackendRow)); + + /* + * Fill row values and calculate maximum width of each field. + */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = parray_get(backup_list, i); + ShowBackendRow *row = &rows[i]; + int cur = 0; + float zratio = 1; + + /* Instance */ + row->instance = instance_name; + widths[cur] = Max(widths[cur], strlen(row->instance)); + cur++; + + /* Version */ + row->version = backup->server_version[0] ? + backup->server_version : "----"; + widths[cur] = Max(widths[cur], strlen(row->version)); + cur++; + + /* ID */ + snprintf(row->backup_id, lengthof(row->backup_id), "%s", + base36enc(backup->start_time)); + widths[cur] = Max(widths[cur], strlen(row->backup_id)); + cur++; + + /* Recovery Time */ + if (backup->recovery_time != (time_t) 0) + time2iso(row->recovery_time, lengthof(row->recovery_time), + backup->recovery_time); + else + StrNCpy(row->recovery_time, "----", sizeof(row->recovery_time)); + widths[cur] = Max(widths[cur], strlen(row->recovery_time)); + cur++; + + /* Mode */ + row->mode = pgBackupGetBackupMode(backup); + widths[cur] = Max(widths[cur], strlen(row->mode)); + cur++; + + /* WAL mode*/ + row->wal_mode = backup->stream ? "STREAM": "ARCHIVE"; + widths[cur] = Max(widths[cur], strlen(row->wal_mode)); + cur++; + + /* Current/Parent TLI */ + if (backup->parent_backup_link != NULL) + parent_tli = backup->parent_backup_link->tli; + + snprintf(row->tli, lengthof(row->tli), "%u/%u", + backup->tli, + backup->backup_mode == BACKUP_MODE_FULL ? 0 : parent_tli); + widths[cur] = Max(widths[cur], strlen(row->tli)); + cur++; + + /* Time */ + if (backup->status == BACKUP_STATUS_RUNNING) + pretty_time_interval(difftime(current_time, backup->start_time), + row->duration, lengthof(row->duration)); + else if (backup->merge_time != (time_t) 0) + pretty_time_interval(difftime(backup->end_time, backup->merge_time), + row->duration, lengthof(row->duration)); + else if (backup->end_time != (time_t) 0) + pretty_time_interval(difftime(backup->end_time, backup->start_time), + row->duration, lengthof(row->duration)); + else + StrNCpy(row->duration, "----", sizeof(row->duration)); + widths[cur] = Max(widths[cur], strlen(row->duration)); + cur++; + + /* Data */ + pretty_size(backup->data_bytes, row->data_bytes, + lengthof(row->data_bytes)); + widths[cur] = Max(widths[cur], strlen(row->data_bytes)); + cur++; + + /* WAL */ + pretty_size(backup->wal_bytes, row->wal_bytes, + lengthof(row->wal_bytes)); + widths[cur] = Max(widths[cur], strlen(row->wal_bytes)); + cur++; + + /* Zratio (compression ratio) */ + if (backup->uncompressed_bytes != BYTES_INVALID && + (backup->uncompressed_bytes > 0 && backup->data_bytes > 0)) + { + zratio = (float)backup->uncompressed_bytes / (backup->data_bytes); + snprintf(row->zratio, lengthof(row->zratio), "%.2f", zratio); + } + else + snprintf(row->zratio, lengthof(row->zratio), "%.2f", zratio); + + widths[cur] = Max(widths[cur], strlen(row->zratio)); + cur++; + + /* Start LSN */ + snprintf(row->start_lsn, lengthof(row->start_lsn), "%X/%X", + (uint32) (backup->start_lsn >> 32), + (uint32) backup->start_lsn); + widths[cur] = Max(widths[cur], strlen(row->start_lsn)); + cur++; + + /* Stop LSN */ + snprintf(row->stop_lsn, lengthof(row->stop_lsn), "%X/%X", + (uint32) (backup->stop_lsn >> 32), + (uint32) backup->stop_lsn); + widths[cur] = Max(widths[cur], strlen(row->stop_lsn)); + cur++; + + /* Status */ + row->status = status2str(backup->status); + widths[cur] = Max(widths[cur], strlen(row->status)); + } + + for (i = 0; i < SHOW_FIELDS_COUNT; i++) + widths_sum += widths[i] + 2 /* two space */; if (show_name) - printfPQExpBuffer(&show_buf, "\nBACKUP INSTANCE '%s'\n", instance_name); + appendPQExpBuffer(&show_buf, "\nBACKUP INSTANCE '%s'\n", instance_name); - /* if you add new fields here, fix the header */ - /* show header */ - appendPQExpBufferStr(&show_buf, - "============================================================================================================================================\n"); - appendPQExpBufferStr(&show_buf, - " Instance Version ID Recovery time Mode WAL Current/Parent TLI Time Data Start LSN Stop LSN Status \n"); - appendPQExpBufferStr(&show_buf, - "============================================================================================================================================\n"); + /* + * Print header. + */ + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); + + for (i = 0; i < SHOW_FIELDS_COUNT; i++) + { + appendPQExpBuffer(&show_buf, field_formats[i], widths[i], names[i]); + } + appendPQExpBufferChar(&show_buf, '\n'); + + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); + /* + * Print values. + */ for (i = 0; i < parray_num(backup_list); i++) { - pgBackup *backup = parray_get(backup_list, i); - TimeLineID parent_tli; - char timestamp[100] = "----"; - char duration[20] = "----"; - char data_bytes_str[10] = "----"; + ShowBackendRow *row = &rows[i]; + int cur = 0; - if (backup->recovery_time != (time_t) 0) - time2iso(timestamp, lengthof(timestamp), backup->recovery_time); - if (backup->end_time != (time_t) 0) - snprintf(duration, lengthof(duration), "%.*lfs", 0, - difftime(backup->end_time, backup->start_time)); - - /* - * Calculate Data field, in the case of full backup this shows the - * total amount of data. For an differential backup, this size is only - * the difference of data accumulated. - */ - pretty_size(backup->data_bytes, data_bytes_str, - lengthof(data_bytes_str)); - - /* Get parent timeline before printing */ - parent_tli = get_parent_tli(backup->tli); - - appendPQExpBuffer(&show_buf, - " %-11s %-8s %-6s %-22s %-6s %-7s %3d / %-3d %5s %6s %2X/%-8X %2X/%-8X %-8s\n", - instance_name, - (backup->server_version[0] ? backup->server_version : "----"), - base36enc(backup->start_time), - timestamp, - pgBackupGetBackupMode(backup), - backup->stream ? "STREAM": "ARCHIVE", - backup->tli, - parent_tli, - duration, - data_bytes_str, - (uint32) (backup->start_lsn >> 32), - (uint32) backup->start_lsn, - (uint32) (backup->stop_lsn >> 32), - (uint32) backup->stop_lsn, - status2str(backup->status)); + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->instance); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->version); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->backup_id); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->recovery_time); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->mode); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->wal_mode); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->tli); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->duration); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->data_bytes); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->wal_bytes); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->zratio); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->start_lsn); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->stop_lsn); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->status); + cur++; + + appendPQExpBufferChar(&show_buf, '\n'); } -} -/* - * Json output. - */ + pfree(rows); +} /* * Show instance backups in json format. */ static void -show_instance_json(parray *backup_list) +show_instance_json(const char *instance_name, parray *backup_list) { int i; PQExpBuffer buf = &show_buf; @@ -377,8 +740,8 @@ show_instance_json(parray *backup_list) /* Begin of instance object */ json_add(buf, JT_BEGIN_OBJECT, &json_level); - json_add_value(buf, "instance", instance_name, json_level, false); - json_add_key(buf, "backups", json_level, true); + json_add_value(buf, "instance", instance_name, json_level, true); + json_add_key(buf, "backups", json_level); /* * List backups. @@ -388,109 +751,368 @@ show_instance_json(parray *backup_list) for (i = 0; i < parray_num(backup_list); i++) { pgBackup *backup = parray_get(backup_list, i); - TimeLineID parent_tli; - char timestamp[100] = "----"; - char lsn[20]; if (i != 0) appendPQExpBufferChar(buf, ','); - json_add(buf, JT_BEGIN_OBJECT, &json_level); + print_backup_json_object(buf, backup); + } - json_add_value(buf, "id", base36enc(backup->start_time), json_level, - false); + /* End of backups */ + json_add(buf, JT_END_ARRAY, &json_level); - if (backup->parent_backup != 0) - json_add_value(buf, "parent-backup-id", - base36enc(backup->parent_backup), json_level, true); + /* End of instance object */ + json_add(buf, JT_END_OBJECT, &json_level); - json_add_value(buf, "backup-mode", pgBackupGetBackupMode(backup), - json_level, true); + first_instance = false; +} - json_add_value(buf, "wal", backup->stream ? "STREAM": "ARCHIVE", - json_level, true); +/* + * show information about WAL archive of the instance + */ +static void +show_instance_archive(InstanceConfig *instance) +{ + parray *timelineinfos; - json_add_value(buf, "compress-alg", - deparse_compress_alg(backup->compress_alg), json_level, - true); + timelineinfos = catalog_get_timelines(instance); - json_add_key(buf, "compress-level", json_level, true); - appendPQExpBuffer(buf, "%d", backup->compress_level); + if (show_format == SHOW_PLAIN) + show_archive_plain(instance->name, instance->xlog_seg_size, timelineinfos, true); + else if (show_format == SHOW_JSON) + show_archive_json(instance->name, instance->xlog_seg_size, timelineinfos); + else + elog(ERROR, "Invalid show format %d", (int) show_format); +} - json_add_value(buf, "from-replica", - backup->from_replica ? "true" : "false", json_level, - true); +static void +show_archive_plain(const char *instance_name, uint32 xlog_seg_size, + parray *tli_list, bool show_name) +{ + char segno_tmp[MAXFNAMELEN]; + parray *actual_tli_list = parray_new(); +#define SHOW_ARCHIVE_FIELDS_COUNT 10 + int i; + const char *names[SHOW_ARCHIVE_FIELDS_COUNT] = + { "TLI", "Parent TLI", "Switchpoint", + "Min Segno", "Max Segno", "N segments", "Size", "Zratio", "N backups", "Status"}; + const char *field_formats[SHOW_ARCHIVE_FIELDS_COUNT] = + { " %-*s ", " %-*s ", " %-*s ", " %-*s ", + " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s "}; + uint32 widths[SHOW_ARCHIVE_FIELDS_COUNT]; + uint32 widths_sum = 0; + ShowArchiveRow *rows; + + for (i = 0; i < SHOW_ARCHIVE_FIELDS_COUNT; i++) + widths[i] = strlen(names[i]); + + /* Ignore empty timelines */ + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); - json_add_key(buf, "block-size", json_level, true); - appendPQExpBuffer(buf, "%u", backup->block_size); + if (tlinfo->n_xlog_files > 0) + parray_append(actual_tli_list, tlinfo); + } - json_add_key(buf, "xlog-block-size", json_level, true); - appendPQExpBuffer(buf, "%u", backup->wal_block_size); + rows = (ShowArchiveRow *) palloc0(parray_num(actual_tli_list) * + sizeof(ShowArchiveRow)); - json_add_key(buf, "checksum-version", json_level, true); - appendPQExpBuffer(buf, "%u", backup->checksum_version); + /* + * Fill row values and calculate maximum width of each field. + */ + for (i = 0; i < parray_num(actual_tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(actual_tli_list, i); + ShowArchiveRow *row = &rows[i]; + int cur = 0; + float zratio = 0; + + /* TLI */ + snprintf(row->tli, lengthof(row->tli), "%u", + tlinfo->tli); + widths[cur] = Max(widths[cur], strlen(row->tli)); + cur++; + + /* Parent TLI */ + snprintf(row->parent_tli, lengthof(row->parent_tli), "%u", + tlinfo->parent_tli); + widths[cur] = Max(widths[cur], strlen(row->parent_tli)); + cur++; + + /* Switchpoint LSN */ + snprintf(row->switchpoint, lengthof(row->switchpoint), "%X/%X", + (uint32) (tlinfo->switchpoint >> 32), + (uint32) tlinfo->switchpoint); + widths[cur] = Max(widths[cur], strlen(row->switchpoint)); + cur++; + + /* Min Segno */ + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->begin_segno, xlog_seg_size); + snprintf(row->min_segno, lengthof(row->min_segno), "%s",segno_tmp); + + widths[cur] = Max(widths[cur], strlen(row->min_segno)); + cur++; + + /* Max Segno */ + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->end_segno, xlog_seg_size); + snprintf(row->max_segno, lengthof(row->max_segno), "%s", segno_tmp); + + widths[cur] = Max(widths[cur], strlen(row->max_segno)); + cur++; + + /* N files */ + snprintf(row->n_segments, lengthof(row->n_segments), "%lu", + tlinfo->n_xlog_files); + widths[cur] = Max(widths[cur], strlen(row->n_segments)); + cur++; + + /* Size */ + pretty_size(tlinfo->size, row->size, + lengthof(row->size)); + widths[cur] = Max(widths[cur], strlen(row->size)); + cur++; + + /* Zratio (compression ratio) */ + if (tlinfo->size != 0) + zratio = ((float)xlog_seg_size*tlinfo->n_xlog_files) / tlinfo->size; + + snprintf(row->zratio, lengthof(row->n_segments), "%.2f", zratio); + widths[cur] = Max(widths[cur], strlen(row->zratio)); + cur++; + + /* N backups */ + snprintf(row->n_backups, lengthof(row->n_backups), "%lu", + tlinfo->backups?parray_num(tlinfo->backups):0); + widths[cur] = Max(widths[cur], strlen(row->n_backups)); + cur++; + + /* Status */ + if (tlinfo->lost_segments == NULL) + row->status = "OK"; + else + row->status = "DEGRADED"; + widths[cur] = Max(widths[cur], strlen(row->status)); + cur++; + } - json_add_value(buf, "program-version", backup->program_version, - json_level, true); - json_add_value(buf, "server-version", backup->server_version, - json_level, true); + for (i = 0; i < SHOW_ARCHIVE_FIELDS_COUNT; i++) + widths_sum += widths[i] + 2 /* two space */; - json_add_key(buf, "current-tli", json_level, true); - appendPQExpBuffer(buf, "%d", backup->tli); + if (show_name) + appendPQExpBuffer(&show_buf, "\nARCHIVE INSTANCE '%s'\n", instance_name); - json_add_key(buf, "parent-tli", json_level, true); - parent_tli = get_parent_tli(backup->tli); - appendPQExpBuffer(buf, "%u", parent_tli); + /* + * Print header. + */ + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); - snprintf(lsn, lengthof(lsn), "%X/%X", - (uint32) (backup->start_lsn >> 32), (uint32) backup->start_lsn); - json_add_value(buf, "start-lsn", lsn, json_level, true); + for (i = 0; i < SHOW_ARCHIVE_FIELDS_COUNT; i++) + { + appendPQExpBuffer(&show_buf, field_formats[i], widths[i], names[i]); + } + appendPQExpBufferChar(&show_buf, '\n'); - snprintf(lsn, lengthof(lsn), "%X/%X", - (uint32) (backup->stop_lsn >> 32), (uint32) backup->stop_lsn); - json_add_value(buf, "stop-lsn", lsn, json_level, true); + for (i = 0; i < widths_sum; i++) + appendPQExpBufferChar(&show_buf, '='); + appendPQExpBufferChar(&show_buf, '\n'); - time2iso(timestamp, lengthof(timestamp), backup->start_time); - json_add_value(buf, "start-time", timestamp, json_level, true); + /* + * Print values. + */ + for (i = parray_num(actual_tli_list) - 1; i >= 0; i--) + { + ShowArchiveRow *row = &rows[i]; + int cur = 0; - if (backup->end_time) - { - time2iso(timestamp, lengthof(timestamp), backup->end_time); - json_add_value(buf, "end-time", timestamp, json_level, true); - } + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->tli); + cur++; - json_add_key(buf, "recovery-xid", json_level, true); - appendPQExpBuffer(buf, XID_FMT, backup->recovery_xid); + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->parent_tli); + cur++; - if (backup->recovery_time > 0) - { - time2iso(timestamp, lengthof(timestamp), backup->recovery_time); - json_add_value(buf, "recovery-time", timestamp, json_level, true); - } + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->switchpoint); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->min_segno); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->max_segno); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->n_segments); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->size); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->zratio); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->n_backups); + cur++; + + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->status); + cur++; + appendPQExpBufferChar(&show_buf, '\n'); + } + + pfree(rows); + //TODO: free timelines +} + +static void +show_archive_json(const char *instance_name, uint32 xlog_seg_size, + parray *tli_list) +{ + int i,j; + PQExpBuffer buf = &show_buf; + parray *actual_tli_list = parray_new(); + char segno_tmp[MAXFNAMELEN]; + + if (!first_instance) + appendPQExpBufferChar(buf, ','); + + /* Begin of instance object */ + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_value(buf, "instance", instance_name, json_level, true); + json_add_key(buf, "timelines", json_level); + + /* Ignore empty timelines */ + + for (i = 0; i < parray_num(tli_list); i++) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i); + + if (tlinfo->n_xlog_files > 0) + parray_append(actual_tli_list, tlinfo); + } + + /* + * List timelines. + */ + json_add(buf, JT_BEGIN_ARRAY, &json_level); + + for (i = parray_num(actual_tli_list) - 1; i >= 0; i--) + { + timelineInfo *tlinfo = (timelineInfo *) parray_get(actual_tli_list, i); + char tmp_buf[MAXFNAMELEN]; + float zratio = 0; + + if (i != (parray_num(actual_tli_list) - 1)) + appendPQExpBufferChar(buf, ','); + + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + json_add_key(buf, "tli", json_level); + appendPQExpBuffer(buf, "%u", tlinfo->tli); + + json_add_key(buf, "parent-tli", json_level); + appendPQExpBuffer(buf, "%u", tlinfo->parent_tli); + + snprintf(tmp_buf, lengthof(tmp_buf), "%X/%X", + (uint32) (tlinfo->switchpoint >> 32), (uint32) tlinfo->switchpoint); + json_add_value(buf, "switchpoint", tmp_buf, json_level, true); + + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->begin_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "min-segno", tmp_buf, json_level, true); + + GetXLogFileName(segno_tmp, tlinfo->tli, tlinfo->end_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "max-segno", tmp_buf, json_level, true); + + json_add_key(buf, "n-segments", json_level); + appendPQExpBuffer(buf, "%lu", tlinfo->n_xlog_files); - if (backup->data_bytes != BYTES_INVALID) + json_add_key(buf, "size", json_level); + appendPQExpBuffer(buf, "%lu", tlinfo->size); + + json_add_key(buf, "zratio", json_level); + if (tlinfo->size != 0) + zratio = ((float)xlog_seg_size*tlinfo->n_xlog_files) / tlinfo->size; + appendPQExpBuffer(buf, "%.2f", zratio); + + if (tlinfo->closest_backup != NULL) + snprintf(tmp_buf, lengthof(tmp_buf), "%s", + base36enc(tlinfo->closest_backup->start_time)); + else + snprintf(tmp_buf, lengthof(tmp_buf), "%s", ""); + + json_add_value(buf, "closest-backup-id", tmp_buf, json_level, true); + + if (tlinfo->lost_segments == NULL) + json_add_value(buf, "status", "OK", json_level, true); + else + json_add_value(buf, "status", "DEGRADED", json_level, true); + + json_add_key(buf, "lost-segments", json_level); + + if (tlinfo->lost_segments != NULL) { - json_add_key(buf, "data-bytes", json_level, true); - appendPQExpBuffer(buf, INT64_FORMAT, backup->data_bytes); + json_add(buf, JT_BEGIN_ARRAY, &json_level); + + for (j = 0; j < parray_num(tlinfo->lost_segments); j++) + { + xlogInterval *lost_segments = (xlogInterval *) parray_get(tlinfo->lost_segments, j); + + if (j != 0) + appendPQExpBufferChar(buf, ','); + + json_add(buf, JT_BEGIN_OBJECT, &json_level); + + GetXLogFileName(segno_tmp, tlinfo->tli, lost_segments->begin_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "begin-segno", tmp_buf, json_level, true); + + GetXLogFileName(segno_tmp, tlinfo->tli, lost_segments->end_segno, xlog_seg_size); + snprintf(tmp_buf, lengthof(tmp_buf), "%s", segno_tmp); + json_add_value(buf, "end-segno", tmp_buf, json_level, true); + json_add(buf, JT_END_OBJECT, &json_level); + } + json_add(buf, JT_END_ARRAY, &json_level); } + else + appendPQExpBuffer(buf, "[]"); + + json_add_key(buf, "backups", json_level); - if (backup->wal_bytes != BYTES_INVALID) + if (tlinfo->backups != NULL) { - json_add_key(buf, "wal-bytes", json_level, true); - appendPQExpBuffer(buf, INT64_FORMAT, backup->wal_bytes); - } + json_add(buf, JT_BEGIN_ARRAY, &json_level); + for (j = 0; j < parray_num(tlinfo->backups); j++) + { + pgBackup *backup = parray_get(tlinfo->backups, j); + + if (j != 0) + appendPQExpBufferChar(buf, ','); - if (backup->primary_conninfo) - json_add_value(buf, "primary_conninfo", backup->primary_conninfo, - json_level, true); + print_backup_json_object(buf, backup); + } - json_add_value(buf, "status", status2str(backup->status), json_level, - true); + json_add(buf, JT_END_ARRAY, &json_level); + } + else + appendPQExpBuffer(buf, "[]"); + /* End of timeline */ json_add(buf, JT_END_OBJECT, &json_level); } - /* End of backups */ + /* End of timelines object */ json_add(buf, JT_END_ARRAY, &json_level); /* End of instance object */ diff --git a/src/status.c b/src/status.c deleted file mode 100644 index 155a07f40..000000000 --- a/src/status.c +++ /dev/null @@ -1,118 +0,0 @@ -/*------------------------------------------------------------------------- - * - * status.c - * - * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group - * - * Monitor status of a PostgreSQL server. - * - *------------------------------------------------------------------------- - */ - - -#include "postgres_fe.h" - -#include -#include -#include - -#include "pg_probackup.h" - -/* PID can be negative for standalone backend */ -typedef long pgpid_t; - -static pgpid_t get_pgpid(void); -static bool postmaster_is_alive(pid_t pid); - -/* - * get_pgpid - * - * Get PID of postmaster, by scanning postmaster.pid. - */ -static pgpid_t -get_pgpid(void) -{ - FILE *pidf; - long pid; - char pid_file[MAXPGPATH]; - - snprintf(pid_file, lengthof(pid_file), "%s/postmaster.pid", pgdata); - - pidf = fopen(pid_file, PG_BINARY_R); - if (pidf == NULL) - { - /* No pid file, not an error on startup */ - if (errno == ENOENT) - return 0; - else - { - elog(ERROR, "could not open PID file \"%s\": %s", - pid_file, strerror(errno)); - } - } - if (fscanf(pidf, "%ld", &pid) != 1) - { - /* Is the file empty? */ - if (ftell(pidf) == 0 && feof(pidf)) - elog(ERROR, "the PID file \"%s\" is empty", - pid_file); - else - elog(ERROR, "invalid data in PID file \"%s\"\n", - pid_file); - } - fclose(pidf); - return (pgpid_t) pid; -} - -/* - * postmaster_is_alive - * - * Check whether postmaster is alive or not. - */ -static bool -postmaster_is_alive(pid_t pid) -{ - /* - * Test to see if the process is still there. Note that we do not - * consider an EPERM failure to mean that the process is still there; - * EPERM must mean that the given PID belongs to some other userid, and - * considering the permissions on $PGDATA, that means it's not the - * postmaster we are after. - * - * Don't believe that our own PID or parent shell's PID is the postmaster, - * either. (Windows hasn't got getppid(), though.) - */ - if (pid == getpid()) - return false; -#ifndef WIN32 - if (pid == getppid()) - return false; -#endif - if (kill(pid, 0) == 0) - return true; - return false; -} - -/* - * is_pg_running - * - * - */ -bool -is_pg_running(void) -{ - pgpid_t pid; - - pid = get_pgpid(); - - /* 0 means no pid file */ - if (pid == 0) - return false; - - /* Case of a standalone backend */ - if (pid < 0) - pid = -pid; - - /* Check if postmaster is alive */ - return postmaster_is_alive((pid_t) pid); -} diff --git a/src/util.c b/src/util.c index cb8f9bf69..5ad751df2 100644 --- a/src/util.c +++ b/src/util.c @@ -3,16 +3,35 @@ * util.c: log messages to log file or stderr, and misc code. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ #include "pg_probackup.h" +#include "catalog/pg_control.h" + #include -#include "storage/bufpage.h" +#include + +#include + +static const char *statusName[] = +{ + "UNKNOWN", + "OK", + "ERROR", + "RUNNING", + "MERGING", + "MERGED", + "DELETING", + "DELETED", + "DONE", + "ORPHAN", + "CORRUPT" +}; const char * base36enc(long unsigned int value) @@ -71,7 +90,8 @@ checkControlFile(ControlFileData *ControlFile) "Either the file is corrupt, or it has a different layout than this program\n" "is expecting. The results below are untrustworthy."); - if (ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0) + if ((ControlFile->pg_control_version % 65536 == 0 || ControlFile->pg_control_version % 65536 > 10000) && + ControlFile->pg_control_version / 65536 != 0) elog(ERROR, "possible byte ordering mismatch\n" "The byte ordering used to store the pg_control file might not match the one\n" "used by this program. In that case the results below would be incorrect, and\n" @@ -100,19 +120,85 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) checkControlFile(ControlFile); } +/* + * Write ControlFile to pg_control + */ +static void +writeControlFile(ControlFileData *ControlFile, const char *path, fio_location location) +{ + int fd; + char *buffer = NULL; + +#if PG_VERSION_NUM >= 100000 + int ControlFileSize = PG_CONTROL_FILE_SIZE; +#else + int ControlFileSize = PG_CONTROL_SIZE; +#endif + + /* copy controlFileSize */ + buffer = pg_malloc(ControlFileSize); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); + + /* Write pg_control */ + fd = fio_open(path, + O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, location); + + if (fd < 0) + elog(ERROR, "Failed to open file: %s", path); + + if (fio_write(fd, buffer, ControlFileSize) != ControlFileSize) + elog(ERROR, "Failed to overwrite file: %s", path); + + if (fio_flush(fd) != 0) + elog(ERROR, "Failed to sync file: %s", path); + + fio_close(fd); + pg_free(buffer); +} + /* * Utility shared by backup and restore to fetch the current timeline * used by a node. */ TimeLineID -get_current_timeline(bool safe) +get_current_timeline(PGconn *conn) +{ + + PGresult *res; + TimeLineID tli = 0; + char *val; + + res = pgut_execute_extended(conn, + "SELECT timeline_id FROM pg_control_checkpoint()", 0, NULL, true, true); + + if (PQresultStatus(res) == PGRES_TUPLES_OK) + val = PQgetvalue(res, 0, 0); + else + return get_current_timeline_from_control(false); + + if (!parse_uint32(val, &tli, 0)) + { + PQclear(res); + elog(WARNING, "Invalid value of timeline_id %s", val); + + /* TODO 3.0 remove it and just error out */ + return get_current_timeline_from_control(false); + } + + return tli; +} + +/* Get timeline from pg_control file */ +TimeLineID +get_current_timeline_from_control(bool safe) { ControlFileData ControlFile; char *buffer; size_t size; /* First fetch file... */ - buffer = slurpFile(pgdata, "global/pg_control", &size, safe); + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, + safe, FIO_DB_HOST); if (safe && buffer == NULL) return 0; @@ -122,15 +208,55 @@ get_current_timeline(bool safe) return ControlFile.checkPointCopy.ThisTimeLineID; } +/* + * Get last check point record ptr from pg_tonrol. + */ +XLogRecPtr +get_checkpoint_location(PGconn *conn) +{ +#if PG_VERSION_NUM >= 90600 + PGresult *res; + uint32 lsn_hi; + uint32 lsn_lo; + XLogRecPtr lsn; + +#if PG_VERSION_NUM >= 100000 + res = pgut_execute(conn, + "SELECT checkpoint_lsn FROM pg_catalog.pg_control_checkpoint()", + 0, NULL); +#else + res = pgut_execute(conn, + "SELECT checkpoint_location FROM pg_catalog.pg_control_checkpoint()", + 0, NULL); +#endif + XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); + PQclear(res); + /* Calculate LSN */ + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; + + return lsn; +#else + char *buffer; + size_t size; + ControlFileData ControlFile; + + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.checkPoint; +#endif +} + uint64 -get_system_identifier(char *pgdata_path) +get_system_identifier(const char *pgdata_path) { ControlFileData ControlFile; char *buffer; size_t size; /* First fetch file... */ - buffer = slurpFile(pgdata_path, "global/pg_control", &size, false); + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); if (buffer == NULL) return 0; digestControlFile(&ControlFile, buffer, size); @@ -164,7 +290,7 @@ get_remote_system_identifier(PGconn *conn) size_t size; ControlFileData ControlFile; - buffer = fetchFile(conn, "global/pg_control", &size); + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); digestControlFile(&ControlFile, buffer, size); pg_free(buffer); @@ -172,6 +298,25 @@ get_remote_system_identifier(PGconn *conn) #endif } +uint32 +get_xlog_seg_size(char *pgdata_path) +{ +#if PG_VERSION_NUM >= 110000 + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.xlog_seg_size; +#else + return (uint32) XLOG_SEG_SIZE; +#endif +} + uint32 get_data_checksum_version(bool safe) { @@ -180,7 +325,8 @@ get_data_checksum_version(bool safe) size_t size; /* First fetch file... */ - buffer = slurpFile(pgdata, "global/pg_control", &size, safe); + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, + safe, FIO_DB_HOST); if (buffer == NULL) return 0; digestControlFile(&ControlFile, buffer, size); @@ -189,58 +335,131 @@ get_data_checksum_version(bool safe) return ControlFile.data_checksum_version; } +pg_crc32c +get_pgcontrol_checksum(const char *pgdata_path) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_BACKUP_HOST); + + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + return ControlFile.crc; +} -/* - * Convert time_t value to ISO-8601 format string. Always set timezone offset. - */ void -time2iso(char *buf, size_t len, time_t time) +get_redo(const char *pgdata_path, RedoParams *redo) { - struct tm *ptm = gmtime(&time); - time_t gmt = mktime(ptm); - time_t offset; - char *ptr = buf; + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); - ptm = localtime(&time); - offset = time - gmt + (ptm->tm_isdst ? 3600 : 0); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); - strftime(ptr, len, "%Y-%m-%d %H:%M:%S", ptm); + redo->lsn = ControlFile.checkPointCopy.redo; + redo->tli = ControlFile.checkPointCopy.ThisTimeLineID; - ptr += strlen(ptr); - snprintf(ptr, len - (ptr - buf), "%c%02d", - (offset >= 0) ? '+' : '-', - abs((int) offset) / SECS_PER_HOUR); + if (ControlFile.minRecoveryPoint > 0 && + ControlFile.minRecoveryPoint < redo->lsn) + { + redo->lsn = ControlFile.minRecoveryPoint; + redo->tli = ControlFile.minRecoveryPointTLI; + } - if (abs((int) offset) % SECS_PER_HOUR != 0) + if (ControlFile.backupStartPoint > 0 && + ControlFile.backupStartPoint < redo->lsn) { - ptr += strlen(ptr); - snprintf(ptr, len - (ptr - buf), ":%02d", - abs((int) offset % SECS_PER_HOUR) / SECS_PER_MINUTE); + redo->lsn = ControlFile.backupStartPoint; + redo->tli = ControlFile.checkPointCopy.ThisTimeLineID; } + + redo->checksum_version = ControlFile.data_checksum_version; } -/* copied from timestamp.c */ -pg_time_t -timestamptz_to_time_t(TimestampTz t) +/* + * Rewrite minRecoveryPoint of pg_control in backup directory. minRecoveryPoint + * 'as-is' is not to be trusted. + */ +void +set_min_recovery_point(pgFile *file, const char *backup_path, + XLogRecPtr stop_backup_lsn) { - pg_time_t result; + ControlFileData ControlFile; + char *buffer; + size_t size; + char fullpath[MAXPGPATH]; -#ifdef HAVE_INT64_TIMESTAMP - result = (pg_time_t) (t / USECS_PER_SEC + - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); -#else - result = (pg_time_t) (t + - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)); -#endif - return result; + /* First fetch file content */ + buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + digestControlFile(&ControlFile, buffer, size); + + elog(LOG, "Current minRecPoint %X/%X", + (uint32) (ControlFile.minRecoveryPoint >> 32), + (uint32) ControlFile.minRecoveryPoint); + + elog(LOG, "Setting minRecPoint to %X/%X", + (uint32) (stop_backup_lsn >> 32), + (uint32) stop_backup_lsn); + + ControlFile.minRecoveryPoint = stop_backup_lsn; + + /* Update checksum in pg_control header */ + INIT_CRC32C(ControlFile.crc); + COMP_CRC32C(ControlFile.crc, (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(ControlFile.crc); + + /* overwrite pg_control */ + snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); + writeControlFile(&ControlFile, fullpath, FIO_LOCAL_HOST); + + /* Update pg_control checksum in backup_list */ + file->crc = ControlFile.crc; + + pg_free(buffer); } -/* Parse string representation of the server version */ -int -parse_server_version(char *server_version_str) +/* + * Copy pg_control file to backup. We do not apply compression to this file. + */ +void +copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, + const char *to_fullpath, fio_location to_location, pgFile *file) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + buffer = slurpFile(from_fullpath, "", &size, false, from_location); + + digestControlFile(&ControlFile, buffer, size); + + file->crc = ControlFile.crc; + file->read_size = size; + file->write_size = size; + file->uncompressed_size = size; + + writeControlFile(&ControlFile, to_fullpath, to_location); + + pg_free(buffer); +} + +/* + * Parse string representation of the server version. + */ +uint32 +parse_server_version(const char *server_version_str) { int nfields; - int result = 0; + uint32 result = 0; int major_version = 0; int minor_version = 0; @@ -259,7 +478,31 @@ parse_server_version(char *server_version_str) result = major_version * 10000; } else - elog(ERROR, "Unknown server version format"); + elog(ERROR, "Unknown server version format %s", server_version_str); + + return result; +} + +/* + * Parse string representation of the program version. + */ +uint32 +parse_program_version(const char *program_version) +{ + int nfields; + int major = 0, + minor = 0, + micro = 0; + uint32 result = 0; + + if (program_version == NULL || program_version[0] == '\0') + return 0; + + nfields = sscanf(program_version, "%d.%d.%d", &major, &minor, µ); + if (nfields == 3) + result = major * 10000 + minor * 100 + micro; + else + elog(ERROR, "Unknown program version format %s", program_version); return result; } @@ -267,59 +510,123 @@ parse_server_version(char *server_version_str) const char * status2str(BackupStatus status) { - static const char *statusName[] = - { - "UNKNOWN", - "OK", - "ERROR", - "RUNNING", - "MERGING", - "DELETING", - "DELETED", - "DONE", - "ORPHAN", - "CORRUPT" - }; if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status) return "UNKNOWN"; return statusName[status]; } -void -remove_trailing_space(char *buf, int comment_mark) +BackupStatus +str2status(const char *status) { - int i; - char *last_char = NULL; + BackupStatus i; - for (i = 0; buf[i]; i++) + for (i = BACKUP_STATUS_INVALID; i <= BACKUP_STATUS_CORRUPT; i++) { - if (buf[i] == comment_mark || buf[i] == '\n' || buf[i] == '\r') - { - buf[i] = '\0'; - break; - } + if (pg_strcasecmp(status, statusName[i]) == 0) return i; } - for (i = 0; buf[i]; i++) + + return BACKUP_STATUS_INVALID; +} + +bool +datapagemap_is_set(datapagemap_t *map, BlockNumber blkno) +{ + int offset; + int bitno; + + offset = blkno / 8; + bitno = blkno % 8; + + /* enlarge or create bitmap if needed */ + if (map->bitmapsize <= offset) { - if (!isspace(buf[i])) - last_char = buf + i; + int oldsize = map->bitmapsize; + int newsize; + + /* + * The minimum to hold the new bit is offset + 1. But add some + * headroom, so that we don't need to repeatedly enlarge the bitmap in + * the common case that blocks are modified in order, from beginning + * of a relation to the end. + */ + newsize = offset + 1; + newsize += 10; + + map->bitmap = pg_realloc(map->bitmap, newsize); + + /* zero out the newly allocated region */ + memset(&map->bitmap[oldsize], 0, newsize - oldsize); + + map->bitmapsize = newsize; } - if (last_char != NULL) - *(last_char + 1) = '\0'; + //datapagemap_print(map); + + /* check the bit */ + return map->bitmap[offset] & (1 << bitno); } +/* + * A debugging aid. Prints out the contents of the page map. + */ void -remove_not_digit(char *buf, size_t len, const char *str) +datapagemap_print_debug(datapagemap_t *map) { - int i, j; + datapagemap_iterator_t *iter; + BlockNumber blocknum; - for (i = 0, j = 0; str[i] && j < len; i++) + iter = datapagemap_iterate(map); + while (datapagemap_next(iter, &blocknum)) + elog(INFO, " block %u", blocknum); + + pg_free(iter); +} + +/* + * Return pid of postmaster process running in given pgdata. + * Return 0 if there is none. + * Return 1 if postmaster.pid is mangled. + */ +pid_t +check_postmaster(const char *pgdata) +{ + FILE *fp; + pid_t pid; + char pid_file[MAXPGPATH]; + + snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pgdata); + + fp = fopen(pid_file, "r"); + if (fp == NULL) + { + /* No pid file, acceptable*/ + if (errno == ENOENT) + return 0; + else + elog(ERROR, "Cannot open file \"%s\": %s", + pid_file, strerror(errno)); + } + + if (fscanf(fp, "%i", &pid) != 1) { - if (!isdigit(str[i])) - continue; - buf[j++] = str[i]; + /* something is wrong with the file content */ + pid = 1; } - buf[j] = '\0'; + + if (pid > 1) + { + if (kill(pid, 0) != 0) + { + /* process no longer exists */ + if (errno == ESRCH) + pid = 0; + else + elog(ERROR, "Failed to send signal 0 to a process %d: %s", + pid, strerror(errno)); + } + } + + fclose(fp); + return pid; } diff --git a/src/utils/configuration.c b/src/utils/configuration.c new file mode 100644 index 000000000..1ef332ed5 --- /dev/null +++ b/src/utils/configuration.c @@ -0,0 +1,1488 @@ +/*------------------------------------------------------------------------- + * + * configuration.c: - function implementations to work with pg_probackup + * configurations. + * + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" +#include "configuration.h" +#include "logger.h" +#include "pgut.h" +#include "file.h" + +#include "datatype/timestamp.h" + +#include "getopt_long.h" + +#include + +#define MAXPG_LSNCOMPONENT 8 + +/* + * Unit conversion tables. + * + * Copied from guc.c. + */ +#define MAX_UNIT_LEN 3 /* length of longest recognized unit string */ + +typedef struct +{ + char unit[MAX_UNIT_LEN + 1]; /* unit, as a string, like "kB" or + * "min" */ + int base_unit; /* OPTION_UNIT_XXX */ + int multiplier; /* If positive, multiply the value with this + * for unit -> base_unit conversion. If + * negative, divide (with the absolute value) */ +} unit_conversion; + +static const char *memory_units_hint = "Valid units for this parameter are \"kB\", \"MB\", \"GB\", and \"TB\"."; + +static const unit_conversion memory_unit_conversion_table[] = +{ + {"TB", OPTION_UNIT_KB, 1024 * 1024 * 1024}, + {"GB", OPTION_UNIT_KB, 1024 * 1024}, + {"MB", OPTION_UNIT_KB, 1024}, + {"KB", OPTION_UNIT_KB, 1}, + {"kB", OPTION_UNIT_KB, 1}, + + {"TB", OPTION_UNIT_BLOCKS, (1024 * 1024 * 1024) / (BLCKSZ / 1024)}, + {"GB", OPTION_UNIT_BLOCKS, (1024 * 1024) / (BLCKSZ / 1024)}, + {"MB", OPTION_UNIT_BLOCKS, 1024 / (BLCKSZ / 1024)}, + {"kB", OPTION_UNIT_BLOCKS, -(BLCKSZ / 1024)}, + + {"TB", OPTION_UNIT_XBLOCKS, (1024 * 1024 * 1024) / (XLOG_BLCKSZ / 1024)}, + {"GB", OPTION_UNIT_XBLOCKS, (1024 * 1024) / (XLOG_BLCKSZ / 1024)}, + {"MB", OPTION_UNIT_XBLOCKS, 1024 / (XLOG_BLCKSZ / 1024)}, + {"kB", OPTION_UNIT_XBLOCKS, -(XLOG_BLCKSZ / 1024)}, + + {""} /* end of table marker */ +}; + +static const char *time_units_hint = "Valid units for this parameter are \"ms\", \"s\", \"min\", \"h\", and \"d\"."; + +static const unit_conversion time_unit_conversion_table[] = +{ + {"d", OPTION_UNIT_MS, 1000 * 60 * 60 * 24}, + {"h", OPTION_UNIT_MS, 1000 * 60 * 60}, + {"min", OPTION_UNIT_MS, 1000 * 60}, + {"s", OPTION_UNIT_MS, 1000}, + {"ms", OPTION_UNIT_MS, 1}, + + {"d", OPTION_UNIT_S, 60 * 60 * 24}, + {"h", OPTION_UNIT_S, 60 * 60}, + {"min", OPTION_UNIT_S, 60}, + {"s", OPTION_UNIT_S, 1}, + {"ms", OPTION_UNIT_S, -1000}, + + {"d", OPTION_UNIT_MIN, 60 * 24}, + {"h", OPTION_UNIT_MIN, 60}, + {"min", OPTION_UNIT_MIN, 1}, + {"s", OPTION_UNIT_MIN, -60}, + {"ms", OPTION_UNIT_MIN, -1000 * 60}, + + {""} /* end of table marker */ +}; + +/* + * Reading functions. + */ + +static uint32 +option_length(const ConfigOption opts[]) +{ + uint32 len; + + for (len = 0; opts && opts[len].type; len++) { } + + return len; +} + +static int +option_has_arg(char type) +{ + switch (type) + { + case 'b': + case 'B': + return no_argument;//optional_argument; + default: + return required_argument; + } +} + +static void +option_copy(struct option dst[], const ConfigOption opts[], size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + { + dst[i].name = opts[i].lname; + dst[i].has_arg = option_has_arg(opts[i].type); + dst[i].flag = NULL; + dst[i].val = opts[i].sname; + } +} + +static ConfigOption * +option_find(int c, ConfigOption opts1[]) +{ + size_t i; + + for (i = 0; opts1 && opts1[i].type; i++) + if (opts1[i].sname == c) + return &opts1[i]; + + return NULL; /* not found */ +} + +static char * +longopts_to_optstring(const struct option opts[], const size_t len) +{ + size_t i; + char *result; + char *s; + + result = pgut_malloc(len * 2 + 1); + + s = result; + for (i = 0; i < len; i++) + { + if (!isprint(opts[i].val)) + continue; + *s++ = opts[i].val; + if (opts[i].has_arg != no_argument) + *s++ = ':'; + } + *s = '\0'; + + return result; +} + +/* + * Compare two strings ignore cases and ignore. + */ +static bool +key_equals(const char *lhs, const char *rhs) +{ + for (; *lhs && *rhs; lhs++, rhs++) + { + if (strchr("-_ ", *lhs)) + { + if (!strchr("-_ ", *rhs)) + return false; + } + else if (ToLower(*lhs) != ToLower(*rhs)) + return false; + } + + return *lhs == '\0' && *rhs == '\0'; +} + +static void +assign_option(ConfigOption *opt, const char *optarg, OptionSource src) +{ + const char *message; + + if (opt == NULL) + elog(ERROR, "Option is not found. Try \"%s --help\" for more information.\n", + PROGRAM_NAME); + + if (opt->source > src) + { + /* high prior value has been set already. */ + return; + } + /* Allow duplicate entries for function option */ + else if (src >= SOURCE_CMD && opt->source >= src && opt->type != 'f') + { + message = "specified only once"; + } + else + { + OptionSource orig_source = opt->source; + + /* can be overwritten if non-command line source */ + opt->source = src; + + switch (opt->type) + { + case 'b': + case 'B': + if (optarg == NULL) + { + *((bool *) opt->var) = (opt->type == 'b'); + return; + } + else if (parse_bool(optarg, (bool *) opt->var)) + { + return; + } + message = "a boolean"; + break; + case 'f': + ((option_assign_fn) opt->var)(opt, optarg); + return; + case 'i': + if (parse_int32(optarg, opt->var, opt->flags)) + return; + message = "a 32bit signed integer"; + break; + case 'u': + if (parse_uint32(optarg, opt->var, opt->flags)) + return; + message = "a 32bit unsigned integer"; + break; + case 'I': + if (parse_int64(optarg, opt->var, opt->flags)) + return; + message = "a 64bit signed integer"; + break; + case 'U': + if (parse_uint64(optarg, opt->var, opt->flags)) + return; + message = "a 64bit unsigned integer"; + break; + case 's': + if (orig_source != SOURCE_DEFAULT) + free(*(char **) opt->var); + + /* 'none' and 'off' are always disable the string parameter */ + //if (optarg && (pg_strcasecmp(optarg, "none") == 0)) + //{ + // *(char **) opt->var = "none"; + // return; + //} + + *(char **) opt->var = pgut_strdup(optarg); + if (strcmp(optarg,"") != 0) + return; + message = "a valid string"; + break; + case 't': + if (parse_time(optarg, opt->var, + opt->source == SOURCE_FILE)) + return; + message = "a time"; + break; + default: + elog(ERROR, "Invalid option type: %c", opt->type); + return; /* keep compiler quiet */ + } + } + + if (optarg) + { + if (isprint(opt->sname)) + elog(ERROR, "Option -%c, --%s should be %s: '%s'", + opt->sname, opt->lname, message, optarg); + else + elog(ERROR, "Option --%s should be %s: '%s'", + opt->lname, message, optarg); + } + else + { + if (isprint(opt->sname)) + elog(ERROR, "Option -%c, --%s should be %s", + opt->sname, opt->lname, message); + else + elog(ERROR, "Option --%s should be %s", + opt->lname, message); + } +} + +static const char * +skip_space(const char *str, const char *line) +{ + while (IsSpace(*str)) { str++; } + return str; +} + +static const char * +get_next_token(const char *src, char *dst, const char *line) +{ + const char *s; + int i; + int j; + + if ((s = skip_space(src, line)) == NULL) + return NULL; + + /* parse quoted string */ + if (*s == '\'') + { + s++; + for (i = 0, j = 0; s[i] != '\0'; i++) + { + if (s[i] == '\'') + { + i++; + /* doubled quote becomes just one quote */ + if (s[i] == '\'') + dst[j] = s[i]; + else + break; + } + else + dst[j] = s[i]; + j++; + } + } + else + { + i = j = strcspn(s, "#\n\r\t\v"); + memcpy(dst, s, j); + } + + dst[j] = '\0'; + return s + i; +} + +static bool +parse_pair(const char buffer[], char key[], char value[]) +{ + const char *start; + const char *end; + + key[0] = value[0] = '\0'; + + /* + * parse key + */ + start = buffer; + if ((start = skip_space(start, buffer)) == NULL) + return false; + + end = start + strcspn(start, "=# \n\r\t\v"); + + /* skip blank buffer */ + if (end - start <= 0) + { + if (*start == '=') + elog(ERROR, "Syntax error in \"%s\"", buffer); + return false; + } + + /* key found */ + strncpy(key, start, end - start); + key[end - start] = '\0'; + + /* find key and value split char */ + if ((start = skip_space(end, buffer)) == NULL) + return false; + + if (*start != '=') + { + elog(ERROR, "Syntax error in \"%s\"", buffer); + return false; + } + + start++; + + /* + * parse value + */ + if ((end = get_next_token(start, value, buffer)) == NULL) + return false; + + if ((start = skip_space(end, buffer)) == NULL) + return false; + + if (*start != '\0' && *start != '#') + { + elog(ERROR, "Syntax error in \"%s\"", buffer); + return false; + } + + return true; +} + +/* + * Returns the current user name. + */ +static const char * +get_username(void) +{ + const char *ret; + +#ifndef WIN32 + struct passwd *pw; + + pw = getpwuid(geteuid()); + ret = (pw ? pw->pw_name : NULL); +#else + static char username[128]; /* remains after function exit */ + DWORD len = sizeof(username) - 1; + + if (GetUserName(username, &len)) + ret = username; + else + { + _dosmaperr(GetLastError()); + ret = NULL; + } +#endif + + if (ret == NULL) + elog(ERROR, "Could not get current user name: %s", strerror(errno)); + return ret; +} + +/* + * Process options passed from command line. + * TODO: currectly argument parsing treat missing argument for options + * as invalid option + */ +int +config_get_opt(int argc, char **argv, ConfigOption cmd_options[], + ConfigOption options[]) +{ + int c; + int optindex = 0; + char *optstring; + struct option *longopts; + uint32 cmd_len, + len; + + cmd_len = option_length(cmd_options); + len = option_length(options); + + longopts = pgut_newarray(struct option, + cmd_len + len + 1 /* zero/end option */); + + /* Concatenate two options */ + option_copy(longopts, cmd_options, cmd_len); + option_copy(longopts + cmd_len, options, len + 1); + + optstring = longopts_to_optstring(longopts, cmd_len + len); + + /* Assign named options */ + while ((c = getopt_long(argc, argv, optstring, longopts, &optindex)) != -1) + { + ConfigOption *opt; + + opt = option_find(c, cmd_options); + if (opt == NULL) + opt = option_find(c, options); + + if (opt + && !remote_agent + && opt->allowed < SOURCE_CMD && opt->allowed != SOURCE_CMD_STRICT) + elog(ERROR, "Option %s cannot be specified in command line", + opt->lname); + /* Check 'opt == NULL' is performed in assign_option() */ + assign_option(opt, optarg, SOURCE_CMD); + } + + return optind; +} + +/* + * Get configuration from configuration file. + * Return number of parsed options. + */ +int +config_read_opt(const char *path, ConfigOption options[], int elevel, + bool strict, bool missing_ok) +{ + FILE *fp; + char buf[1024]; + char key[1024]; + char value[1024]; + int parsed_options = 0; + + if (!options) + return parsed_options; + + if ((fp = pgut_fopen(path, "rt", missing_ok)) == NULL) + return parsed_options; + + while (fgets(buf, lengthof(buf), fp)) + { + size_t i; + + for (i = strlen(buf); i > 0 && IsSpace(buf[i - 1]); i--) + buf[i - 1] = '\0'; + + if (parse_pair(buf, key, value)) + { + for (i = 0; options[i].type; i++) + { + ConfigOption *opt = &options[i]; + + if (key_equals(key, opt->lname)) + { + if (opt->allowed < SOURCE_FILE && + opt->allowed != SOURCE_FILE_STRICT) + elog(elevel, "Option %s cannot be specified in file", + opt->lname); + else if (opt->source <= SOURCE_FILE) + { + assign_option(opt, value, SOURCE_FILE); + parsed_options++; + } + break; + } + } + if (strict && !options[i].type) + elog(elevel, "Invalid option \"%s\" in file \"%s\"", key, path); + } + } + + if (ferror(fp)) + elog(ERROR, "Failed to read from file: \"%s\"", path); + + fio_close_stream(fp); + + return parsed_options; +} + +/* + * Process options passed as environment variables. + */ +void +config_get_opt_env(ConfigOption options[]) +{ + size_t i; + + for (i = 0; options && options[i].type; i++) + { + ConfigOption *opt = &options[i]; + const char *value = NULL; + + /* If option was already set do not check env */ + if (opt->source > SOURCE_ENV || opt->allowed < SOURCE_ENV) + continue; + + if (strcmp(opt->lname, "pgdata") == 0) + value = getenv("PGDATA"); + if (strcmp(opt->lname, "port") == 0) + value = getenv("PGPORT"); + if (strcmp(opt->lname, "host") == 0) + value = getenv("PGHOST"); + if (strcmp(opt->lname, "username") == 0) + value = getenv("PGUSER"); + if (strcmp(opt->lname, "pgdatabase") == 0) + { + value = getenv("PGDATABASE"); + if (value == NULL) + value = getenv("PGUSER"); + if (value == NULL) + value = get_username(); + } + + if (value) + assign_option(opt, value, SOURCE_ENV); + } +} + +/* + * Manually set source of the option. Find it by the pointer var. + */ +void +config_set_opt(ConfigOption options[], void *var, OptionSource source) +{ + int i; + + for (i = 0; options[i].type; i++) + { + ConfigOption *opt = &options[i]; + + if (opt->var == var) + { + if ((opt->allowed == SOURCE_FILE_STRICT && source != SOURCE_FILE) || + (opt->allowed == SOURCE_CMD_STRICT && source != SOURCE_CMD) || + (opt->allowed < source && opt->allowed >= SOURCE_ENV)) + elog(ERROR, "Invalid option source %d for %s", + source, opt->lname); + + opt->source = source; + break; + } + } +} + +/* + * Return value of the function in the string representation. Result is + * allocated string. + */ +char * +option_get_value(ConfigOption *opt) +{ + int64 value = 0; + uint64 value_u = 0; + const char *unit = NULL; + + /* + * If it is defined a unit for the option get readable value from base with + * unit name. + */ + if (opt->flags & OPTION_UNIT) + { + if (opt->type == 'i') + convert_from_base_unit(*((int32 *) opt->var), + opt->flags & OPTION_UNIT, &value, &unit); + else if (opt->type == 'i') + convert_from_base_unit(*((int64 *) opt->var), + opt->flags & OPTION_UNIT, &value, &unit); + else if (opt->type == 'u') + convert_from_base_unit_u(*((uint32 *) opt->var), + opt->flags & OPTION_UNIT, &value_u, &unit); + else if (opt->type == 'U') + convert_from_base_unit_u(*((uint64 *) opt->var), + opt->flags & OPTION_UNIT, &value_u, &unit); + } + + /* Get string representation itself */ + switch (opt->type) + { + case 'b': + case 'B': + return psprintf("%s", *((bool *) opt->var) ? "true" : "false"); + case 'i': + if (opt->flags & OPTION_UNIT) + return psprintf(INT64_FORMAT "%s", value, unit); + else + return psprintf("%d", *((int32 *) opt->var)); + case 'u': + if (opt->flags & OPTION_UNIT) + return psprintf(UINT64_FORMAT "%s", value_u, unit); + else + return psprintf("%u", *((uint32 *) opt->var)); + case 'I': + if (opt->flags & OPTION_UNIT) + return psprintf(INT64_FORMAT "%s", value, unit); + else + return psprintf(INT64_FORMAT, *((int64 *) opt->var)); + case 'U': + if (opt->flags & OPTION_UNIT) + return psprintf(UINT64_FORMAT "%s", value_u, unit); + else + return psprintf(UINT64_FORMAT, *((uint64 *) opt->var)); + case 's': + if (*((char **) opt->var) == NULL) + return NULL; + /* 'none' and 'off' are always disable the string parameter */ + //if ((pg_strcasecmp(*((char **) opt->var), "none") == 0) || + // (pg_strcasecmp(*((char **) opt->var), "off") == 0)) + // return NULL; + return pstrdup(*((char **) opt->var)); + case 't': + { + char *timestamp; + time_t t = *((time_t *) opt->var); + + if (t > 0) + { + timestamp = palloc(100); + time2iso(timestamp, 100, t); + } + else + timestamp = palloc0(1 /* just null termination */); + return timestamp; + } + default: + elog(ERROR, "Invalid option type: %c", opt->type); + return NULL; /* keep compiler quiet */ + } +} + +/* + * Parsing functions + */ + +/* + * Convert a value from one of the human-friendly units ("kB", "min" etc.) + * to the given base unit. 'value' and 'unit' are the input value and unit + * to convert from. The converted value is stored in *base_value. + * + * Returns true on success, false if the input unit is not recognized. + */ +static bool +convert_to_base_unit(int64 value, const char *unit, + int base_unit, int64 *base_value) +{ + const unit_conversion *table; + int i; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit && + strcmp(unit, table[i].unit) == 0) + { + if (table[i].multiplier < 0) + *base_value = value / (-table[i].multiplier); + else + { + /* Check for integer overflow first */ + if (value > PG_INT64_MAX / table[i].multiplier) + return false; + + *base_value = value * table[i].multiplier; + } + return true; + } + } + return false; +} + +/* + * Unsigned variant of convert_to_base_unit() + */ +static bool +convert_to_base_unit_u(uint64 value, const char *unit, + int base_unit, uint64 *base_value) +{ + const unit_conversion *table; + int i; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit && + strcmp(unit, table[i].unit) == 0) + { + if (table[i].multiplier < 0) + *base_value = value / (-table[i].multiplier); + else + { + /* Check for integer overflow first */ + if (value > PG_UINT64_MAX / table[i].multiplier) + return false; + + *base_value = value * table[i].multiplier; + } + return true; + } + } + return false; +} + +static bool +parse_unit(char *unit_str, int flags, int64 value, int64 *base_value) +{ + /* allow whitespace between integer and unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + /* Handle possible unit */ + if (*unit_str != '\0') + { + char unit[MAX_UNIT_LEN + 1]; + int unitlen; + bool converted = false; + + if ((flags & OPTION_UNIT) == 0) + return false; /* this setting does not accept a unit */ + + unitlen = 0; + while (*unit_str != '\0' && !isspace((unsigned char) *unit_str) && + unitlen < MAX_UNIT_LEN) + unit[unitlen++] = *(unit_str++); + unit[unitlen] = '\0'; + /* allow whitespace after unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + if (*unit_str == '\0') + converted = convert_to_base_unit(value, unit, (flags & OPTION_UNIT), + base_value); + if (!converted) + return false; + } + + return true; +} + +/* + * Unsigned variant of parse_unit() + */ +static bool +parse_unit_u(char *unit_str, int flags, uint64 value, uint64 *base_value) +{ + /* allow whitespace between integer and unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + /* Handle possible unit */ + if (*unit_str != '\0') + { + char unit[MAX_UNIT_LEN + 1]; + int unitlen; + bool converted = false; + + if ((flags & OPTION_UNIT) == 0) + return false; /* this setting does not accept a unit */ + + unitlen = 0; + while (*unit_str != '\0' && !isspace((unsigned char) *unit_str) && + unitlen < MAX_UNIT_LEN) + unit[unitlen++] = *(unit_str++); + unit[unitlen] = '\0'; + /* allow whitespace after unit */ + while (isspace((unsigned char) *unit_str)) + unit_str++; + + if (*unit_str == '\0') + converted = convert_to_base_unit_u(value, unit, + (flags & OPTION_UNIT), + base_value); + if (!converted) + return false; + } + + return true; +} + +/* + * Try to interpret value as boolean value. Valid values are: true, + * false, yes, no, on, off, 1, 0; as well as unique prefixes thereof. + * If the string parses okay, return true, else false. + * If okay and result is not NULL, return the value in *result. + */ +bool +parse_bool(const char *value, bool *result) +{ + return parse_bool_with_len(value, strlen(value), result); +} + +bool +parse_bool_with_len(const char *value, size_t len, bool *result) +{ + switch (*value) + { + case 't': + case 'T': + if (pg_strncasecmp(value, "true", len) == 0) + { + if (result) + *result = true; + return true; + } + break; + case 'f': + case 'F': + if (pg_strncasecmp(value, "false", len) == 0) + { + if (result) + *result = false; + return true; + } + break; + case 'y': + case 'Y': + if (pg_strncasecmp(value, "yes", len) == 0) + { + if (result) + *result = true; + return true; + } + break; + case 'n': + case 'N': + if (pg_strncasecmp(value, "no", len) == 0) + { + if (result) + *result = false; + return true; + } + break; + case 'o': + case 'O': + /* 'o' is not unique enough */ + if (pg_strncasecmp(value, "on", (len > 2 ? len : 2)) == 0) + { + if (result) + *result = true; + return true; + } + else if (pg_strncasecmp(value, "off", (len > 2 ? len : 2)) == 0) + { + if (result) + *result = false; + return true; + } + break; + case '1': + if (len == 1) + { + if (result) + *result = true; + return true; + } + break; + case '0': + if (len == 1) + { + if (result) + *result = false; + return true; + } + break; + default: + break; + } + + if (result) + *result = false; /* suppress compiler warning */ + return false; +} + +/* + * Parse string as 32bit signed int. + * valid range: -2147483648 ~ 2147483647 + */ +bool +parse_int32(const char *value, int32 *result, int flags) +{ + int64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_INT32_MAX; + return true; + } + + errno = 0; + val = strtol(value, &endptr, 0); + if (endptr == value || (*endptr && flags == 0)) + return false; + + /* Check for integer overflow */ + if (errno == ERANGE || val != (int64) ((int32) val)) + return false; + + if (!parse_unit(endptr, flags, val, &val)) + return false; + + /* Check for integer overflow again */ + if (val != (int64) ((int32) val)) + return false; + + *result = val; + + return true; +} + +/* + * Parse string as 32bit unsigned int. + * valid range: 0 ~ 4294967295 (2^32-1) + */ +bool +parse_uint32(const char *value, uint32 *result, int flags) +{ + uint64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_UINT32_MAX; + return true; + } + + errno = 0; + val = strtoul(value, &endptr, 0); + if (endptr == value || (*endptr && flags == 0)) + return false; + + /* Check for integer overflow */ + if (errno == ERANGE || val != (uint64) ((uint32) val)) + return false; + + if (!parse_unit_u(endptr, flags, val, &val)) + return false; + + /* Check for integer overflow again */ + if (val != (uint64) ((uint32) val)) + return false; + + *result = val; + + return true; +} + +/* + * Parse string as int64 + * valid range: -9223372036854775808 ~ 9223372036854775807 + */ +bool +parse_int64(const char *value, int64 *result, int flags) +{ + int64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_INT64_MAX; + return true; + } + + errno = 0; +#if defined(HAVE_LONG_INT_64) + val = strtol(value, &endptr, 0); +#elif defined(HAVE_LONG_LONG_INT_64) + val = strtoll(value, &endptr, 0); +#else + val = strtol(value, &endptr, 0); +#endif + if (endptr == value || (*endptr && flags == 0)) + return false; + + if (errno == ERANGE) + return false; + + if (!parse_unit(endptr, flags, val, &val)) + return false; + + *result = val; + + return true; +} + +/* + * Parse string as uint64 + * valid range: 0 ~ (2^64-1) + */ +bool +parse_uint64(const char *value, uint64 *result, int flags) +{ + uint64 val; + char *endptr; + + if (strcmp(value, INFINITE_STR) == 0) + { + *result = PG_UINT64_MAX; + return true; + } + + errno = 0; +#if defined(HAVE_LONG_INT_64) + val = strtoul(value, &endptr, 0); +#elif defined(HAVE_LONG_LONG_INT_64) + val = strtoull(value, &endptr, 0); +#else + val = strtoul(value, &endptr, 0); +#endif + if (endptr == value || (*endptr && flags == 0)) + return false; + + if (errno == ERANGE) + return false; + + if (!parse_unit_u(endptr, flags, val, &val)) + return false; + + *result = val; + + return true; +} + +/* + * Convert ISO-8601 format string to time_t value. + * + * If utc_default is true, then if timezone offset isn't specified tz will be + * +00:00. + * + * TODO: '0' converted into '2000-01-01 00:00:00'. Example: set-backup --expire-time=0 + */ +bool +parse_time(const char *value, time_t *result, bool utc_default) +{ + size_t len; + int fields_num, + tz = 0, + i; + bool tz_set = false; + char *tmp; + struct tm tm; + char junk[2]; + + /* tmp = replace( value, !isalnum, ' ' ) */ + tmp = pgut_malloc(strlen(value) + + 1); + len = 0; + fields_num = 1; + + while (*value) + { + if (IsAlnum(*value)) + { + tmp[len++] = *value; + value++; + } + else if (fields_num < 6) + { + fields_num++; + tmp[len++] = ' '; + value++; + } + /* timezone field is 7th */ + else if ((*value == '-' || *value == '+') && fields_num == 6) + { + int hr, + min, + sec = 0; + char *cp; + + errno = 0; + hr = strtol(value + 1, &cp, 10); + if ((value + 1) == cp || errno == ERANGE) + return false; + + /* explicit delimiter? */ + if (*cp == ':') + { + errno = 0; + min = strtol(cp + 1, &cp, 10); + if (errno == ERANGE) + return false; + if (*cp == ':') + { + errno = 0; + sec = strtol(cp + 1, &cp, 10); + if (errno == ERANGE) + return false; + } + } + /* otherwise, might have run things together... */ + else if (*cp == '\0' && strlen(value) > 3) + { + min = hr % 100; + hr = hr / 100; + /* we could, but don't, support a run-together hhmmss format */ + } + else + min = 0; + + /* Range-check the values; see notes in datatype/timestamp.h */ + if (hr < 0 || hr > MAX_TZDISP_HOUR) + return false; + if (min < 0 || min >= MINS_PER_HOUR) + return false; + if (sec < 0 || sec >= SECS_PER_MINUTE) + return false; + + tz = (hr * MINS_PER_HOUR + min) * SECS_PER_MINUTE + sec; + if (*value == '-') + tz = -tz; + + tz_set = true; + + fields_num++; + value = cp; + } + /* wrong format */ + else if (!IsSpace(*value)) + return false; + else + value++; + } + tmp[len] = '\0'; + + /* parse for "YYYY-MM-DD HH:MI:SS" */ + memset(&tm, 0, sizeof(tm)); + tm.tm_year = 0; /* tm_year is year - 1900 */ + tm.tm_mon = 0; /* tm_mon is 0 - 11 */ + tm.tm_mday = 1; /* tm_mday is 1 - 31 */ + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + i = sscanf(tmp, "%04d %02d %02d %02d %02d %02d%1s", + &tm.tm_year, &tm.tm_mon, &tm.tm_mday, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec, junk); + free(tmp); + + if (i < 3 || i > 6) + return false; + + /* adjust year */ + if (tm.tm_year < 100) + tm.tm_year += 2000 - 1900; + else if (tm.tm_year >= 1900) + tm.tm_year -= 1900; + + /* adjust month */ + if (i > 1) + tm.tm_mon -= 1; + + /* determine whether Daylight Saving Time is in effect */ + tm.tm_isdst = -1; + + *result = mktime(&tm); + + /* adjust time zone */ + if (tz_set || utc_default) + { + time_t ltime = time(NULL); + struct tm *ptm = gmtime(<ime); + time_t gmt = mktime(ptm); + time_t offset; + + /* UTC time */ + *result -= tz; + + /* Get local time */ + ptm = localtime(<ime); + offset = ltime - gmt + (ptm->tm_isdst ? 3600 : 0); + + *result += offset; + } + + return true; +} + +/* + * Try to parse value as an integer. The accepted formats are the + * usual decimal, octal, or hexadecimal formats, optionally followed by + * a unit name if "flags" indicates a unit is allowed. + * + * If the string parses okay, return true, else false. + * If okay and result is not NULL, return the value in *result. + * If not okay and hintmsg is not NULL, *hintmsg is set to a suitable + * HINT message, or NULL if no hint provided. + */ +bool +parse_int(const char *value, int *result, int flags, const char **hintmsg) +{ + int64 val; + char *endptr; + + /* To suppress compiler warnings, always set output params */ + if (result) + *result = 0; + if (hintmsg) + *hintmsg = NULL; + + /* We assume here that int64 is at least as wide as long */ + errno = 0; + val = strtol(value, &endptr, 0); + + if (endptr == value) + return false; /* no HINT for integer syntax error */ + + if (errno == ERANGE || val != (int64) ((int32) val)) + { + if (hintmsg) + *hintmsg = "Value exceeds integer range."; + return false; + } + + /* allow whitespace between integer and unit */ + while (isspace((unsigned char) *endptr)) + endptr++; + + /* Handle possible unit */ + if (*endptr != '\0') + { + char unit[MAX_UNIT_LEN + 1]; + int unitlen; + bool converted = false; + + if ((flags & OPTION_UNIT) == 0) + return false; /* this setting does not accept a unit */ + + unitlen = 0; + while (*endptr != '\0' && !isspace((unsigned char) *endptr) && + unitlen < MAX_UNIT_LEN) + unit[unitlen++] = *(endptr++); + unit[unitlen] = '\0'; + /* allow whitespace after unit */ + while (isspace((unsigned char) *endptr)) + endptr++; + + if (*endptr == '\0') + converted = convert_to_base_unit(val, unit, (flags & OPTION_UNIT), + &val); + if (!converted) + { + /* invalid unit, or garbage after the unit; set hint and fail. */ + if (hintmsg) + { + if (flags & OPTION_UNIT_MEMORY) + *hintmsg = memory_units_hint; + else + *hintmsg = time_units_hint; + } + return false; + } + + /* Check for overflow due to units conversion */ + if (val != (int64) ((int32) val)) + { + if (hintmsg) + *hintmsg = "Value exceeds integer range."; + return false; + } + } + + if (result) + *result = (int) val; + return true; +} + +bool +parse_lsn(const char *value, XLogRecPtr *result) +{ + uint32 xlogid; + uint32 xrecoff; + int len1; + int len2; + + len1 = strspn(value, "0123456789abcdefABCDEF"); + if (len1 < 1 || len1 > MAXPG_LSNCOMPONENT || value[len1] != '/') + elog(ERROR, "invalid LSN \"%s\"", value); + len2 = strspn(value + len1 + 1, "0123456789abcdefABCDEF"); + if (len2 < 1 || len2 > MAXPG_LSNCOMPONENT || value[len1 + 1 + len2] != '\0') + elog(ERROR, "invalid LSN \"%s\"", value); + + if (sscanf(value, "%X/%X", &xlogid, &xrecoff) == 2) + *result = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; + else + { + elog(ERROR, "invalid LSN \"%s\"", value); + return false; + } + + return true; +} + +/* + * Convert a value in some base unit to a human-friendly unit. The output + * unit is chosen so that it's the greatest unit that can represent the value + * without loss. For example, if the base unit is GUC_UNIT_KB, 1024 is + * converted to 1 MB, but 1025 is represented as 1025 kB. + */ +void +convert_from_base_unit(int64 base_value, int base_unit, + int64 *value, const char **unit) +{ + const unit_conversion *table; + int i; + + *unit = NULL; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit) + { + /* + * Accept the first conversion that divides the value evenly. We + * assume that the conversions for each base unit are ordered from + * greatest unit to the smallest! + */ + if (table[i].multiplier < 0) + { + /* Check for integer overflow first */ + if (base_value > PG_INT64_MAX / (-table[i].multiplier)) + continue; + + *value = base_value * (-table[i].multiplier); + *unit = table[i].unit; + break; + } + else if (base_value % table[i].multiplier == 0) + { + *value = base_value / table[i].multiplier; + *unit = table[i].unit; + break; + } + } + } + + Assert(*unit != NULL); +} + +/* + * Unsigned variant of convert_from_base_unit() + */ +void +convert_from_base_unit_u(uint64 base_value, int base_unit, + uint64 *value, const char **unit) +{ + const unit_conversion *table; + int i; + + *unit = NULL; + + if (base_unit & OPTION_UNIT_MEMORY) + table = memory_unit_conversion_table; + else + table = time_unit_conversion_table; + + for (i = 0; *table[i].unit; i++) + { + if (base_unit == table[i].base_unit) + { + /* + * Accept the first conversion that divides the value evenly. We + * assume that the conversions for each base unit are ordered from + * greatest unit to the smallest! + */ + if (table[i].multiplier < 0) + { + /* Check for integer overflow first */ + if (base_value > PG_UINT64_MAX / (-table[i].multiplier)) + continue; + + *value = base_value * (-table[i].multiplier); + *unit = table[i].unit; + break; + } + else if (base_value % table[i].multiplier == 0) + { + *value = base_value / table[i].multiplier; + *unit = table[i].unit; + break; + } + } + } + + Assert(*unit != NULL); +} + +/* + * Convert time_t value to ISO-8601 format string. Always set timezone offset. + */ +void +time2iso(char *buf, size_t len, time_t time) +{ + struct tm *ptm = gmtime(&time); + time_t gmt = mktime(ptm); + time_t offset; + char *ptr = buf; + + ptm = localtime(&time); + offset = time - gmt + (ptm->tm_isdst ? 3600 : 0); + + strftime(ptr, len, "%Y-%m-%d %H:%M:%S", ptm); + + ptr += strlen(ptr); + snprintf(ptr, len - (ptr - buf), "%c%02d", + (offset >= 0) ? '+' : '-', + abs((int) offset) / SECS_PER_HOUR); + + if (abs((int) offset) % SECS_PER_HOUR != 0) + { + ptr += strlen(ptr); + snprintf(ptr, len - (ptr - buf), ":%02d", + abs((int) offset % SECS_PER_HOUR) / SECS_PER_MINUTE); + } +} diff --git a/src/utils/configuration.h b/src/utils/configuration.h new file mode 100644 index 000000000..46b5d6c1b --- /dev/null +++ b/src/utils/configuration.h @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------- + * + * configuration.h: - prototypes of functions and structures for + * configuration. + * + * Copyright (c) 2018-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef CONFIGURATION_H +#define CONFIGURATION_H + +#include "postgres_fe.h" +#include "access/xlogdefs.h" + +#define INFINITE_STR "INFINITE" + +typedef enum OptionSource +{ + SOURCE_DEFAULT, + SOURCE_FILE_STRICT, + SOURCE_CMD_STRICT, + SOURCE_ENV, + SOURCE_FILE, + SOURCE_CMD, + SOURCE_CONST +} OptionSource; + +typedef struct ConfigOption ConfigOption; + +typedef void (*option_assign_fn) (ConfigOption *opt, const char *arg); +/* Returns allocated string value */ +typedef char *(*option_get_fn) (ConfigOption *opt); + +/* + * type: + * b: bool (true) + * B: bool (false) + * f: option_fn + * i: 32bit signed integer + * u: 32bit unsigned integer + * I: 64bit signed integer + * U: 64bit unsigned integer + * s: string + * t: time_t + */ +struct ConfigOption +{ + char type; + uint8 sname; /* short name */ + const char *lname; /* long name */ + void *var; /* pointer to variable */ + OptionSource allowed; /* allowed source */ + OptionSource source; /* actual source */ + const char *group; /* option group name */ + int flags; /* option unit */ + option_get_fn get_value; /* function to get the value as a string, + should return allocated string*/ +}; + +/* + * bit values in "flags" of an option + */ +#define OPTION_UNIT_KB 0x1000 /* value is in kilobytes */ +#define OPTION_UNIT_BLOCKS 0x2000 /* value is in blocks */ +#define OPTION_UNIT_XBLOCKS 0x3000 /* value is in xlog blocks */ +#define OPTION_UNIT_XSEGS 0x4000 /* value is in xlog segments */ +#define OPTION_UNIT_MEMORY 0xF000 /* mask for size-related units */ + +#define OPTION_UNIT_MS 0x10000 /* value is in milliseconds */ +#define OPTION_UNIT_S 0x20000 /* value is in seconds */ +#define OPTION_UNIT_MIN 0x30000 /* value is in minutes */ +#define OPTION_UNIT_TIME 0xF0000 /* mask for time-related units */ + +#define OPTION_UNIT (OPTION_UNIT_MEMORY | OPTION_UNIT_TIME) + +extern int config_get_opt(int argc, char **argv, ConfigOption cmd_options[], + ConfigOption options[]); +extern int config_read_opt(const char *path, ConfigOption options[], int elevel, + bool strict, bool missing_ok); +extern void config_get_opt_env(ConfigOption options[]); +extern void config_set_opt(ConfigOption options[], void *var, + OptionSource source); + +extern char *option_get_value(ConfigOption *opt); + +extern bool parse_bool(const char *value, bool *result); +extern bool parse_bool_with_len(const char *value, size_t len, bool *result); +extern bool parse_int32(const char *value, int32 *result, int flags); +extern bool parse_uint32(const char *value, uint32 *result, int flags); +extern bool parse_int64(const char *value, int64 *result, int flags); +extern bool parse_uint64(const char *value, uint64 *result, int flags); +extern bool parse_time(const char *value, time_t *result, bool utc_default); +extern bool parse_int(const char *value, int *result, int flags, + const char **hintmsg); +extern bool parse_lsn(const char *value, XLogRecPtr *result); + +extern void time2iso(char *buf, size_t len, time_t time); + +extern void convert_from_base_unit(int64 base_value, int base_unit, + int64 *value, const char **unit); +extern void convert_from_base_unit_u(uint64 base_value, int base_unit, + uint64 *value, const char **unit); + +#endif /* CONFIGURATION_H */ diff --git a/src/utils/file.c b/src/utils/file.c new file mode 100644 index 000000000..b29a67070 --- /dev/null +++ b/src/utils/file.c @@ -0,0 +1,2727 @@ +#include +#include +#include + +#include "pg_probackup.h" +#include "file.h" +#include "storage/checksum.h" + +#define PRINTF_BUF_SIZE 1024 +#define FILE_PERMISSIONS 0600 + +static __thread unsigned long fio_fdset = 0; +static __thread void* fio_stdin_buffer; +static __thread int fio_stdout = 0; +static __thread int fio_stdin = 0; +static __thread int fio_stderr = 0; + +fio_location MyLocation; + +typedef struct +{ + BlockNumber nblocks; + BlockNumber segmentno; + XLogRecPtr horizonLsn; + uint32 checksumVersion; + int calg; + int clevel; + int bitmapsize; + int path_len; +} fio_send_request; + + +typedef struct +{ + char path[MAXPGPATH]; + bool exclude; + bool follow_symlink; + bool add_root; + bool backup_logs; + bool exclusive_backup; + bool skip_hidden; + int external_dir_num; +} fio_list_dir_request; + +typedef struct +{ + mode_t mode; + size_t size; + time_t mtime; + bool is_datafile; + bool is_database; + Oid tblspcOid; + Oid dbOid; + Oid relOid; + ForkName forkName; + int segno; + int external_dir_num; + int linked_len; +} fio_pgFile; + +typedef struct +{ + BlockNumber n_blocks; + BlockNumber segmentno; + XLogRecPtr stop_lsn; + uint32 checksumVersion; +} fio_checksum_map_request; + +typedef struct +{ + BlockNumber n_blocks; + BlockNumber segmentno; + XLogRecPtr shift_lsn; + uint32 checksumVersion; +} fio_lsn_map_request; + + +/* Convert FIO pseudo handle to index in file descriptor array */ +#define fio_fileno(f) (((size_t)f - 1) | FIO_PIPE_MARKER) + +#if defined(WIN32) +#undef open(a, b, c) +#undef fopen(a, b) +#endif + +/* Use specified file descriptors as stdin/stdout for FIO functions */ +void fio_redirect(int in, int out, int err) +{ + fio_stdin = in; + fio_stdout = out; + fio_stderr = err; +} + +void fio_error(int rc, int size, char const* file, int line) +{ + if (remote_agent) + { + fprintf(stderr, "%s:%d: processed %d bytes instead of %d: %s\n", file, line, rc, size, rc >= 0 ? "end of data" : strerror(errno)); + exit(EXIT_FAILURE); + } + else + { + char buf[PRINTF_BUF_SIZE+1]; +// Assert(false); + int err_size = read(fio_stderr, buf, PRINTF_BUF_SIZE); + if (err_size > 0) + { + buf[err_size] = '\0'; + elog(ERROR, "Agent error: %s", buf); + } + else + elog(ERROR, "Communication error: %s", rc >= 0 ? "end of data" : strerror(errno)); + } +} + +/* Check if file descriptor is local or remote (created by FIO) */ +static bool fio_is_remote_fd(int fd) +{ + return (fd & FIO_PIPE_MARKER) != 0; +} + +#ifdef WIN32 + +#undef stat + +/* + * The stat() function in win32 is not guaranteed to update the st_size + * field when run. So we define our own version that uses the Win32 API + * to update this field. + */ +static int +fio_safestat(const char *path, struct stat *buf) +{ + int r; + WIN32_FILE_ATTRIBUTE_DATA attr; + + r = stat(path, buf); + if (r < 0) + return r; + + if (!GetFileAttributesEx(path, GetFileExInfoStandard, &attr)) + { + errno = ENOENT; + return -1; + } + + /* + * XXX no support for large files here, but we don't do that in general on + * Win32 yet. + */ + buf->st_size = attr.nFileSizeLow; + + return 0; +} + +#define stat(x, y) fio_safestat(x, y) + +/* TODO: use real pread on Linux */ +static ssize_t pread(int fd, void* buf, size_t size, off_t off) +{ + off_t rc = lseek(fd, off, SEEK_SET); + if (rc != off) + return -1; + return read(fd, buf, size); +} +static int remove_file_or_dir(char const* path) +{ + int rc = remove(path); +#ifdef WIN32 + if (rc < 0 && errno == EACCESS) + rc = rmdir(path); +#endif + return rc; +} +#else +#define remove_file_or_dir(path) remove(path) +#endif + +/* Check if specified location is local for current node */ +bool fio_is_remote(fio_location location) +{ + bool is_remote = MyLocation != FIO_LOCAL_HOST + && location != FIO_LOCAL_HOST + && location != MyLocation; + if (is_remote && !fio_stdin && !launch_agent()) + elog(ERROR, "Failed to establish SSH connection: %s", strerror(errno)); + return is_remote; +} + +/* Check if specified location is local for current node */ +bool fio_is_remote_simple(fio_location location) +{ + bool is_remote = MyLocation != FIO_LOCAL_HOST + && location != FIO_LOCAL_HOST + && location != MyLocation; + return is_remote; +} + +/* Try to read specified amount of bytes unless error or EOF are encountered */ +static ssize_t fio_read_all(int fd, void* buf, size_t size) +{ + size_t offs = 0; + while (offs < size) + { + ssize_t rc = read(fd, (char*)buf + offs, size - offs); + if (rc < 0) + { + if (errno == EINTR) + continue; + elog(ERROR, "fio_read_all error, fd %i: %s", fd, strerror(errno)); + return rc; + } + else if (rc == 0) + break; + + offs += rc; + } + return offs; +} + +/* Try to write specified amount of bytes unless error is encountered */ +static ssize_t fio_write_all(int fd, void const* buf, size_t size) +{ + size_t offs = 0; + while (offs < size) + { + ssize_t rc = write(fd, (char*)buf + offs, size - offs); + if (rc <= 0) + { + if (errno == EINTR) + continue; + + elog(ERROR, "fio_write_all error, fd %i: %s", fd, strerror(errno)); + + return rc; + } + offs += rc; + } + return offs; +} + +/* Get version of remote agent */ +int fio_get_agent_version(void) +{ + fio_header hdr; + hdr.cop = FIO_AGENT_VERSION; + hdr.size = 0; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + return hdr.arg; +} + +/* Open input stream. Remote file is fetched to the in-memory buffer and then accessed through Linux fmemopen */ +FILE* fio_open_stream(char const* path, fio_location location) +{ + FILE* f; + if (fio_is_remote(location)) + { + fio_header hdr; + hdr.cop = FIO_LOAD; + hdr.size = strlen(path) + 1; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, hdr.size), hdr.size); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + if (hdr.size > 0) + { + Assert(fio_stdin_buffer == NULL); + fio_stdin_buffer = pgut_malloc(hdr.size); + IO_CHECK(fio_read_all(fio_stdin, fio_stdin_buffer, hdr.size), hdr.size); +#ifdef WIN32 + f = tmpfile(); + IO_CHECK(fwrite(f, 1, hdr.size, fio_stdin_buffer), hdr.size); + SYS_CHECK(fseek(f, 0, SEEK_SET)); +#else + f = fmemopen(fio_stdin_buffer, hdr.size, "r"); +#endif + } + else + { + f = NULL; + } + } + else + { + f = fopen(path, "rt"); + } + return f; +} + +/* Close input stream */ +int fio_close_stream(FILE* f) +{ + if (fio_stdin_buffer) + { + free(fio_stdin_buffer); + fio_stdin_buffer = NULL; + } + return fclose(f); +} + +/* Open directory */ +DIR* fio_opendir(char const* path, fio_location location) +{ + DIR* dir; + if (fio_is_remote(location)) + { + int i; + fio_header hdr; + unsigned long mask; + + mask = fio_fdset; + for (i = 0; (mask & 1) != 0; i++, mask >>= 1); + if (i == FIO_FDMAX) { + elog(ERROR, "Descriptor pool for remote files is exhausted, " + "probably too many remote directories are opened"); + } + hdr.cop = FIO_OPENDIR; + hdr.handle = i; + hdr.size = strlen(path) + 1; + fio_fdset |= 1 << i; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, hdr.size), hdr.size); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + fio_fdset &= ~(1 << hdr.handle); + return NULL; + } + dir = (DIR*)(size_t)(i + 1); + } + else + { + dir = opendir(path); + } + return dir; +} + +/* Get next directory entry */ +struct dirent* fio_readdir(DIR *dir) +{ + if (fio_is_remote_file((FILE*)dir)) + { + fio_header hdr; + static __thread struct dirent entry; + + hdr.cop = FIO_READDIR; + hdr.handle = (size_t)dir - 1; + hdr.size = 0; + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + if (hdr.size) { + Assert(hdr.size == sizeof(entry)); + IO_CHECK(fio_read_all(fio_stdin, &entry, sizeof(entry)), sizeof(entry)); + } + + return hdr.size ? &entry : NULL; + } + else + { + return readdir(dir); + } +} + +/* Close directory */ +int fio_closedir(DIR *dir) +{ + if (fio_is_remote_file((FILE*)dir)) + { + fio_header hdr; + hdr.cop = FIO_CLOSEDIR; + hdr.handle = (size_t)dir - 1; + hdr.size = 0; + fio_fdset &= ~(1 << hdr.handle); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + return 0; + } + else + { + return closedir(dir); + } +} + +/* Open file */ +int fio_open(char const* path, int mode, fio_location location) +{ + int fd; + if (fio_is_remote(location)) + { + int i; + fio_header hdr; + unsigned long mask; + + mask = fio_fdset; + for (i = 0; (mask & 1) != 0; i++, mask >>= 1); + if (i == FIO_FDMAX) + elog(ERROR, "Descriptor pool for remote files is exhausted, " + "probably too many remote files are opened"); + + hdr.cop = FIO_OPEN; + hdr.handle = i; + hdr.size = strlen(path) + 1; + hdr.arg = mode; +// hdr.arg = mode & ~O_EXCL; +// elog(INFO, "PATH: %s MODE: %i, %i", path, mode, O_EXCL); +// elog(INFO, "MODE: %i", hdr.arg); + fio_fdset |= 1 << i; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, hdr.size), hdr.size); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + fio_fdset &= ~(1 << hdr.handle); + return -1; + } + fd = i | FIO_PIPE_MARKER; + } + else + { + fd = open(path, mode, FILE_PERMISSIONS); + } + return fd; +} + + +/* Close ssh session */ +void +fio_disconnect(void) +{ + if (fio_stdin) + { + fio_header hdr; + hdr.cop = FIO_DISCONNECT; + hdr.size = 0; + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_DISCONNECTED); + SYS_CHECK(close(fio_stdin)); + SYS_CHECK(close(fio_stdout)); + fio_stdin = 0; + fio_stdout = 0; + wait_ssh(); + } +} + +/* Open stdio file */ +FILE* fio_fopen(char const* path, char const* mode, fio_location location) +{ + FILE *f = NULL; + + if (fio_is_remote(location)) + { + int flags = 0; + int fd; + if (strcmp(mode, PG_BINARY_W) == 0) { + flags = O_TRUNC|PG_BINARY|O_RDWR|O_CREAT; + } else if (strcmp(mode, "w") == 0) { + flags = O_TRUNC|O_RDWR|O_CREAT; + } else if (strcmp(mode, PG_BINARY_R) == 0) { + flags = O_RDONLY|PG_BINARY; + } else if (strcmp(mode, "r") == 0) { + flags = O_RDONLY; + } else if (strcmp(mode, PG_BINARY_R "+") == 0) { + /* stdio fopen("rb+") actually doesn't create unexisted file, but probackup frequently + * needs to open existed file or create new one if not exists. + * In stdio it can be done using two fopen calls: fopen("r+") and if failed then fopen("w"). + * But to eliminate extra call which especially critical in case of remote connection + * we change r+ semantic to create file if not exists. + */ + flags = O_RDWR|O_CREAT|PG_BINARY; + } else if (strcmp(mode, "r+") == 0) { /* see comment above */ + flags |= O_RDWR|O_CREAT; + } else if (strcmp(mode, "a") == 0) { + flags |= O_CREAT|O_RDWR|O_APPEND; + } else { + Assert(false); + } + fd = fio_open(path, flags, location); + if (fd >= 0) + f = (FILE*)(size_t)((fd + 1) & ~FIO_PIPE_MARKER); + } + else + { + f = fopen(path, mode); + if (f == NULL && strcmp(mode, PG_BINARY_R "+") == 0) + f = fopen(path, PG_BINARY_W); + } + return f; +} + +/* Format output to file stream */ +int fio_fprintf(FILE* f, char const* format, ...) +{ + int rc; + va_list args; + va_start (args, format); + if (fio_is_remote_file(f)) + { + char buf[PRINTF_BUF_SIZE]; +#ifdef HAS_VSNPRINTF + rc = vsnprintf(buf, sizeof(buf), format, args); +#else + rc = vsprintf(buf, format, args); +#endif + if (rc > 0) { + fio_fwrite(f, buf, rc); + } + } + else + { + rc = vfprintf(f, format, args); + } + va_end (args); + return rc; +} + +/* Flush stream data (does nothing for remote file) */ +int fio_fflush(FILE* f) +{ + int rc = 0; + if (!fio_is_remote_file(f)) + rc = fflush(f); + return rc; +} + +/* Sync file to the disk (does nothing for remote file) */ +int fio_flush(int fd) +{ + return fio_is_remote_fd(fd) ? 0 : fsync(fd); +} + +/* Close output stream */ +int fio_fclose(FILE* f) +{ + return fio_is_remote_file(f) + ? fio_close(fio_fileno(f)) + : fclose(f); +} + +/* Close file */ +int fio_close(int fd) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_CLOSE; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + fio_fdset &= ~(1 << hdr.handle); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + /* Note, that file is closed without waiting for confirmation */ + + return 0; + } + else + { + return close(fd); + } +} + +/* Truncate stdio file */ +int fio_ftruncate(FILE* f, off_t size) +{ + return fio_is_remote_file(f) + ? fio_truncate(fio_fileno(f), size) + : ftruncate(fileno(f), size); +} + +/* Truncate file */ +int fio_truncate(int fd, off_t size) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_TRUNCATE; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = size; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + return 0; + } + else + { + return ftruncate(fd, size); + } +} + + +/* + * Read file from specified location. + */ +int fio_pread(FILE* f, void* buf, off_t offs) +{ + if (fio_is_remote_file(f)) + { + int fd = fio_fileno(f); + fio_header hdr; + + hdr.cop = FIO_PREAD; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = offs; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + if (hdr.size != 0) + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + /* TODO: error handling */ + + return hdr.arg; + } + else + { + /* For local file, opened by fopen, we should use stdio functions */ + int rc = fseek(f, offs, SEEK_SET); + + if (rc < 0) + return rc; + + return fread(buf, 1, BLCKSZ, f); + } +} + +/* Set position in stdio file */ +int fio_fseek(FILE* f, off_t offs) +{ + return fio_is_remote_file(f) + ? fio_seek(fio_fileno(f), offs) + : fseek(f, offs, SEEK_SET); +} + +/* Set position in file */ +int fio_seek(int fd, off_t offs) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_SEEK; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = offs; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + return 0; + } + else + { + return lseek(fd, offs, SEEK_SET); + } +} + +/* Write data to stdio file */ +size_t fio_fwrite(FILE* f, void const* buf, size_t size) +{ + return fio_is_remote_file(f) + ? fio_write(fio_fileno(f), buf, size) + : fwrite(buf, 1, size, f); +} + +/* Write data to the file */ +ssize_t fio_write(int fd, void const* buf, size_t size) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_WRITE; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = size; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, buf, size), size); + + return size; + } + else + { + return write(fd, buf, size); + } +} + +int32 +fio_decompress(void* dst, void const* src, size_t size, int compress_alg) +{ + const char *errormsg = NULL; + int32 uncompressed_size = do_decompress(dst, BLCKSZ, + src, + size, + compress_alg, &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + { + elog(WARNING, "An error occured during decompressing block: %s", errormsg); + return -1; + } + + if (uncompressed_size != BLCKSZ) + { + elog(ERROR, "Page uncompressed to %d bytes != BLCKSZ", + uncompressed_size); + return -1; + } + return uncompressed_size; +} + +/* Write data to the file */ +ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compress_alg) +{ + if (fio_is_remote_file(f)) + { + fio_header hdr; + + hdr.cop = FIO_WRITE_COMPRESSED; + hdr.handle = fio_fileno(f) & ~FIO_PIPE_MARKER; + hdr.size = size; + hdr.arg = compress_alg; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, buf, size), size); + + return size; + } + else + { + char uncompressed_buf[BLCKSZ]; + int32 uncompressed_size = fio_decompress(uncompressed_buf, buf, size, compress_alg); + + return (uncompressed_size < 0) + ? uncompressed_size + : fwrite(uncompressed_buf, 1, uncompressed_size, f); + } +} + +static ssize_t +fio_write_compressed_impl(int fd, void const* buf, size_t size, int compress_alg) +{ + char uncompressed_buf[BLCKSZ]; + int32 uncompressed_size = fio_decompress(uncompressed_buf, buf, size, compress_alg); + return fio_write_all(fd, uncompressed_buf, uncompressed_size); +} + +/* Read data from stdio file */ +ssize_t fio_fread(FILE* f, void* buf, size_t size) +{ + size_t rc; + if (fio_is_remote_file(f)) + return fio_read(fio_fileno(f), buf, size); + rc = fread(buf, 1, size, f); + return rc == 0 && !feof(f) ? -1 : rc; +} + +/* Read data from file */ +ssize_t fio_read(int fd, void* buf, size_t size) +{ + if (fio_is_remote_fd(fd)) + { + fio_header hdr; + + hdr.cop = FIO_READ; + hdr.handle = fd & ~FIO_PIPE_MARKER; + hdr.size = 0; + hdr.arg = size; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_SEND); + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + return hdr.size; + } + else + { + return read(fd, buf, size); + } +} + +/* Get information about file */ +int fio_stat(char const* path, struct stat* st, bool follow_symlink, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + + hdr.cop = FIO_STAT; + hdr.handle = -1; + hdr.arg = follow_symlink; + hdr.size = path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_STAT); + IO_CHECK(fio_read_all(fio_stdin, st, sizeof(*st)), sizeof(*st)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + return -1; + } + return 0; + } + else + { + return follow_symlink ? stat(path, st) : lstat(path, st); + } +} + +/* Check presence of the file */ +int fio_access(char const* path, int mode, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_ACCESS; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_ACCESS); + + if (hdr.arg != 0) + { + errno = hdr.arg; + return -1; + } + return 0; + } + else + { + return access(path, mode); + } +} + +/* Create symbolic link */ +int fio_symlink(char const* target, char const* link_path, bool overwrite, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t target_len = strlen(target) + 1; + size_t link_path_len = strlen(link_path) + 1; + hdr.cop = FIO_SYMLINK; + hdr.handle = -1; + hdr.size = target_len + link_path_len; + hdr.arg = overwrite ? 1 : 0; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, target, target_len), target_len); + IO_CHECK(fio_write_all(fio_stdout, link_path, link_path_len), link_path_len); + + return 0; + } + else + { + if (overwrite) + remove_file_or_dir(link_path); + + return symlink(target, link_path); + } +} + +static void fio_symlink_impl(int out, char *buf, bool overwrite) +{ + char *linked_path = buf; + char *link_path = buf + strlen(buf) + 1; + + if (overwrite) + remove_file_or_dir(link_path); + + if (symlink(linked_path, link_path)) + elog(ERROR, "Could not create symbolic link \"%s\": %s", + link_path, strerror(errno)); +} + +/* Rename file */ +int fio_rename(char const* old_path, char const* new_path, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t old_path_len = strlen(old_path) + 1; + size_t new_path_len = strlen(new_path) + 1; + hdr.cop = FIO_RENAME; + hdr.handle = -1; + hdr.size = old_path_len + new_path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, old_path, old_path_len), old_path_len); + IO_CHECK(fio_write_all(fio_stdout, new_path, new_path_len), new_path_len); + + //TODO: wait for confirmation. + + return 0; + } + else + { + return rename(old_path, new_path); + } +} + +/* Sync file to disk */ +int fio_sync(char const* path, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_SYNC; + hdr.handle = -1; + hdr.size = path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.arg != 0) + { + errno = hdr.arg; + return -1; + } + + return 0; + } + else + { + int fd; + + fd = open(path, O_WRONLY | PG_BINARY, FILE_PERMISSIONS); + if (fd < 0) + return -1; + + if (fsync(fd) < 0) + { + close(fd); + return -1; + } + close(fd); + + return 0; + } +} + +/* Get crc32 of file */ +pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(file_path) + 1; + pg_crc32 crc = 0; + hdr.cop = FIO_GET_CRC32; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = 0; + + if (decompress) + hdr.arg = 1; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, file_path, path_len), path_len); + IO_CHECK(fio_read_all(fio_stdin, &crc, sizeof(crc)), sizeof(crc)); + + return crc; + } + else + { + if (decompress) + return pgFileGetCRCgz(file_path, true, true); + else + return pgFileGetCRC(file_path, true, true); + } +} + +/* Remove file */ +int fio_unlink(char const* path, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_UNLINK; + hdr.handle = -1; + hdr.size = path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + // TODO: error is swallowed ? + return 0; + } + else + { + return remove_file_or_dir(path); + } +} + +/* Create directory */ +int fio_mkdir(char const* path, int mode, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_MKDIR; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + Assert(hdr.cop == FIO_MKDIR); + + return hdr.arg; + } + else + { + return dir_create_dir(path, mode); + } +} + +/* Change file mode */ +int fio_chmod(char const* path, int mode, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + size_t path_len = strlen(path) + 1; + hdr.cop = FIO_CHMOD; + hdr.handle = -1; + hdr.size = path_len; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, path, path_len), path_len); + + return 0; + } + else + { + return chmod(path, mode); + } +} + +#ifdef HAVE_LIBZ + +#define ZLIB_BUFFER_SIZE (64*1024) +#define MAX_WBITS 15 /* 32K LZ77 window */ +#define DEF_MEM_LEVEL 8 +#define FIO_GZ_REMOTE_MARKER 1 + +typedef struct fioGZFile +{ + z_stream strm; + int fd; + int errnum; + bool compress; + bool eof; + Bytef buf[ZLIB_BUFFER_SIZE]; +} fioGZFile; + +/* On error returns NULL and errno should be checked */ +gzFile +fio_gzopen(char const* path, char const* mode, int level, fio_location location) +{ + int rc; + if (fio_is_remote(location)) + { + fioGZFile* gz = (fioGZFile*) pgut_malloc(sizeof(fioGZFile)); + memset(&gz->strm, 0, sizeof(gz->strm)); + gz->eof = 0; + gz->errnum = Z_OK; + /* check if file opened for writing */ + if (strcmp(mode, PG_BINARY_W) == 0) /* compress */ + { + gz->strm.next_out = gz->buf; + gz->strm.avail_out = ZLIB_BUFFER_SIZE; + rc = deflateInit2(&gz->strm, + level, + Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY); + if (rc == Z_OK) + { + gz->compress = 1; + gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location); + if (gz->fd < 0) + { + free(gz); + return NULL; + } + } + } + else + { + gz->strm.next_in = gz->buf; + gz->strm.avail_in = ZLIB_BUFFER_SIZE; + rc = inflateInit2(&gz->strm, 15 + 16); + gz->strm.avail_in = 0; + if (rc == Z_OK) + { + gz->compress = 0; + gz->fd = fio_open(path, O_RDONLY | PG_BINARY, location); + if (gz->fd < 0) + { + free(gz); + return NULL; + } + } + } + if (rc != Z_OK) + { + elog(ERROR, "zlib internal error when opening file %s: %s", + path, gz->strm.msg); + } + return (gzFile)((size_t)gz + FIO_GZ_REMOTE_MARKER); + } + else + { + gzFile file; + /* check if file opened for writing */ + if (strcmp(mode, PG_BINARY_W) == 0) + { + int fd = open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS); + if (fd < 0) + return NULL; + file = gzdopen(fd, mode); + } + else + file = gzopen(path, mode); + if (file != NULL && level != Z_DEFAULT_COMPRESSION) + { + if (gzsetparams(file, level, Z_DEFAULT_STRATEGY) != Z_OK) + elog(ERROR, "Cannot set compression level %d: %s", + level, strerror(errno)); + } + return file; + } +} + +int +fio_gzread(gzFile f, void *buf, unsigned size) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + int rc; + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + + if (gz->eof) + { + return 0; + } + + gz->strm.next_out = (Bytef *)buf; + gz->strm.avail_out = size; + + while (1) + { + if (gz->strm.avail_in != 0) /* If there is some data in receiver buffer, then decompress it */ + { + rc = inflate(&gz->strm, Z_NO_FLUSH); + if (rc == Z_STREAM_END) + { + gz->eof = 1; + } + else if (rc != Z_OK) + { + gz->errnum = rc; + return -1; + } + if (gz->strm.avail_out != size) + { + return size - gz->strm.avail_out; + } + if (gz->strm.avail_in == 0) + { + gz->strm.next_in = gz->buf; + } + } + else + { + gz->strm.next_in = gz->buf; + } + rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in, + gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in); + if (rc > 0) + { + gz->strm.avail_in += rc; + } + else + { + if (rc == 0) + { + gz->eof = 1; + } + return rc; + } + } + } + else + { + return gzread(f, buf, size); + } +} + +int +fio_gzwrite(gzFile f, void const* buf, unsigned size) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + int rc; + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + + gz->strm.next_in = (Bytef *)buf; + gz->strm.avail_in = size; + + do + { + if (gz->strm.avail_out == ZLIB_BUFFER_SIZE) /* Compress buffer is empty */ + { + gz->strm.next_out = gz->buf; /* Reset pointer to the beginning of buffer */ + + if (gz->strm.avail_in != 0) /* Has something in input buffer */ + { + rc = deflate(&gz->strm, Z_NO_FLUSH); + Assert(rc == Z_OK); + gz->strm.next_out = gz->buf; /* Reset pointer to the beginning of buffer */ + } + else + { + break; + } + } + rc = fio_write(gz->fd, gz->strm.next_out, ZLIB_BUFFER_SIZE - gz->strm.avail_out); + if (rc >= 0) + { + gz->strm.next_out += rc; + gz->strm.avail_out += rc; + } + else + { + return rc; + } + } while (gz->strm.avail_out != ZLIB_BUFFER_SIZE || gz->strm.avail_in != 0); + + return size; + } + else + { + return gzwrite(f, buf, size); + } +} + +int +fio_gzclose(gzFile f) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + int rc; + if (gz->compress) + { + gz->strm.next_out = gz->buf; + rc = deflate(&gz->strm, Z_FINISH); + Assert(rc == Z_STREAM_END && gz->strm.avail_out != ZLIB_BUFFER_SIZE); + deflateEnd(&gz->strm); + rc = fio_write(gz->fd, gz->buf, ZLIB_BUFFER_SIZE - gz->strm.avail_out); + if (rc != ZLIB_BUFFER_SIZE - gz->strm.avail_out) + { + return -1; + } + } + else + { + inflateEnd(&gz->strm); + } + rc = fio_close(gz->fd); + free(gz); + return rc; + } + else + { + return gzclose(f); + } +} + +int fio_gzeof(gzFile f) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + return gz->eof; + } + else + { + return gzeof(f); + } +} + +const char* fio_gzerror(gzFile f, int *errnum) +{ + if ((size_t)f & FIO_GZ_REMOTE_MARKER) + { + fioGZFile* gz = (fioGZFile*)((size_t)f - FIO_GZ_REMOTE_MARKER); + if (errnum) + *errnum = gz->errnum; + return gz->strm.msg; + } + else + { + return gzerror(f, errnum); + } +} + +z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence) +{ + Assert(!((size_t)f & FIO_GZ_REMOTE_MARKER)); + return gzseek(f, offset, whence); +} + + +#endif + +/* Send file content + * Note: it should not be used for large files. + */ +static void fio_load_file(int out, char const* path) +{ + int fd = open(path, O_RDONLY); + fio_header hdr; + void* buf = NULL; + + hdr.cop = FIO_SEND; + hdr.size = 0; + + if (fd >= 0) + { + off_t size = lseek(fd, 0, SEEK_END); + buf = pgut_malloc(size); + lseek(fd, 0, SEEK_SET); + IO_CHECK(fio_read_all(fd, buf, size), size); + hdr.size = size; + SYS_CHECK(close(fd)); + } + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (buf) + { + IO_CHECK(fio_write_all(out, buf, hdr.size), hdr.size); + free(buf); + } +} + +/* + * Return number of actually(!) readed blocks, attempts or + * half-readed block are not counted. + * Return values in case of error: + * FILE_MISSING + * OPEN_FAILED + * READ_ERROR + * PAGE_CORRUPTION + * WRITE_FAILED + * + * If none of the above, this function return number of blocks + * readed by remote agent. + * + * In case of DELTA mode horizonLsn must be a valid lsn, + * otherwise it should be set to InvalidXLogRecPtr. + */ +int fio_send_pages(const char *to_fullpath, const char *from_fullpath, pgFile *file, + XLogRecPtr horizonLsn, int calg, int clevel, uint32 checksum_version, + bool use_pagemap, BlockNumber* err_blknum, char **errormsg, + BackupPageHeader2 **headers) +{ + FILE *out = NULL; + char *out_buf = NULL; + struct { + fio_header hdr; + fio_send_request arg; + } req; + BlockNumber n_blocks_read = 0; + BlockNumber blknum = 0; + + /* send message with header + + 8bytes 24bytes var var + -------------------------------------------------------------- + | fio_header | fio_send_request | FILE PATH | BITMAP(if any) | + -------------------------------------------------------------- + */ + + req.hdr.cop = FIO_SEND_PAGES; + + if (use_pagemap) + { + req.hdr.size = sizeof(fio_send_request) + (*file).pagemap.bitmapsize + strlen(from_fullpath) + 1; + req.arg.bitmapsize = (*file).pagemap.bitmapsize; + + /* TODO: add optimization for the case of pagemap + * containing small number of blocks with big serial numbers: + * https://github.com/postgrespro/pg_probackup/blob/remote_page_backup/src/utils/file.c#L1211 + */ + } + else + { + req.hdr.size = sizeof(fio_send_request) + strlen(from_fullpath) + 1; + req.arg.bitmapsize = 0; + } + + req.arg.nblocks = file->size/BLCKSZ; + req.arg.segmentno = file->segno * RELSEG_SIZE; + req.arg.horizonLsn = horizonLsn; + req.arg.checksumVersion = checksum_version; + req.arg.calg = calg; + req.arg.clevel = clevel; + req.arg.path_len = strlen(from_fullpath) + 1; + + file->compress_alg = calg; /* TODO: wtf? why here? */ + +//<----- +// datapagemap_iterator_t *iter; +// BlockNumber blkno; +// iter = datapagemap_iterate(pagemap); +// while (datapagemap_next(iter, &blkno)) +// elog(INFO, "block %u", blkno); +// pg_free(iter); +//<----- + + /* send header */ + IO_CHECK(fio_write_all(fio_stdout, &req, sizeof(req)), sizeof(req)); + + /* send file path */ + IO_CHECK(fio_write_all(fio_stdout, from_fullpath, req.arg.path_len), req.arg.path_len); + + /* send pagemap if any */ + if (use_pagemap) + IO_CHECK(fio_write_all(fio_stdout, (*file).pagemap.bitmap, (*file).pagemap.bitmapsize), (*file).pagemap.bitmapsize); + + while (true) + { + fio_header hdr; + char buf[BLCKSZ + sizeof(BackupPageHeader)]; + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (interrupted) + elog(ERROR, "Interrupted during page reading"); + + if (hdr.cop == FIO_ERROR) + { + /* FILE_MISSING, OPEN_FAILED and READ_FAILED */ + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + *errormsg = pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", buf); + } + + return hdr.arg; + } + else if (hdr.cop == FIO_SEND_FILE_CORRUPTION) + { + *err_blknum = hdr.arg; + + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + *errormsg = pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", buf); + } + return PAGE_CORRUPTION; + } + else if (hdr.cop == FIO_SEND_FILE_EOF) + { + /* n_blocks_read reported by EOF */ + n_blocks_read = hdr.arg; + + /* receive headers if any */ + if (hdr.size > 0) + { + *headers = pgut_malloc(hdr.size); + IO_CHECK(fio_read_all(fio_stdin, *headers, hdr.size), hdr.size); + file->n_headers = (hdr.size / sizeof(BackupPageHeader2)) -1; + } + + break; + } + else if (hdr.cop == FIO_PAGE) + { + blknum = hdr.arg; + + Assert(hdr.size <= sizeof(buf)); + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + COMP_FILE_CRC32(true, file->crc, buf, hdr.size); + + /* lazily open backup file */ + if (!out) + out = open_local_file_rw(to_fullpath, &out_buf, STDIO_BUFSIZE); + + if (fio_fwrite(out, buf, hdr.size) != hdr.size) + { + fio_fclose(out); + *err_blknum = blknum; + return WRITE_FAILED; + } + file->write_size += hdr.size; + file->uncompressed_size += BLCKSZ; + } + else + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + + if (out) + fclose(out); + pg_free(out_buf); + + return n_blocks_read; +} + +/* TODO: read file using large buffer + * Return codes: + * FIO_ERROR: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + + * FIO_SEND_FILE_CORRUPTION + * FIO_SEND_FILE_EOF + */ +static void fio_send_pages_impl(int out, char* buf) +{ + FILE *in = NULL; + BlockNumber blknum = 0; + int current_pos = 0; + BlockNumber n_blocks_read = 0; + PageState page_st; + char read_buffer[BLCKSZ+1]; + char in_buf[STDIO_BUFSIZE]; + fio_header hdr; + fio_send_request *req = (fio_send_request*) buf; + char *from_fullpath = (char*) buf + sizeof(fio_send_request); + bool with_pagemap = req->bitmapsize > 0 ? true : false; + /* error reporting */ + char *errormsg = NULL; + /* parse buffer */ + datapagemap_t *map = NULL; + datapagemap_iterator_t *iter = NULL; + /* page headers */ + int32 hdr_num = -1; + int32 cur_pos_out = 0; + BackupPageHeader2 *headers = NULL; + + /* open source file */ + in = fopen(from_fullpath, PG_BINARY_R); + if (!in) + { + hdr.cop = FIO_ERROR; + + /* do not send exact wording of ENOENT error message + * because it is a very common error in our case, so + * error code is enough. + */ + if (errno == ENOENT) + { + hdr.arg = FILE_MISSING; + hdr.size = 0; + } + else + { + hdr.arg = OPEN_FAILED; + errormsg = pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot open file \"%s\": %s", + from_fullpath, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + } + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + + if (with_pagemap) + { + map = pgut_malloc(sizeof(datapagemap_t)); + map->bitmapsize = req->bitmapsize; + map->bitmap = (char*) buf + sizeof(fio_send_request) + req->path_len; + + /* get first block */ + iter = datapagemap_iterate(map); + datapagemap_next(iter, &blknum); + + setvbuf(in, NULL, _IONBF, BUFSIZ); + } + else + setvbuf(in, in_buf, _IOFBF, STDIO_BUFSIZE); + + /* TODO: what is this barrier for? */ + read_buffer[BLCKSZ] = 1; /* barrier */ + + while (blknum < req->nblocks) + { + int rc = 0; + size_t read_len = 0; + int retry_attempts = PAGE_READ_ATTEMPTS; + + /* TODO: handle signals on the agent */ + if (interrupted) + elog(ERROR, "Interrupted during remote page reading"); + + /* read page, check header and validate checksumms */ + for (;;) + { + /* + * Optimize stdio buffer usage, fseek only when current position + * does not match the position of requested block. + */ + if (current_pos != blknum*BLCKSZ) + { + current_pos = blknum*BLCKSZ; + if (fseek(in, current_pos, SEEK_SET) != 0) + elog(ERROR, "fseek to position %u is failed on remote file '%s': %s", + current_pos, from_fullpath, strerror(errno)); + } + + read_len = fread(read_buffer, 1, BLCKSZ, in); + + current_pos += read_len; + + /* report error */ + if (ferror(in)) + { + hdr.cop = FIO_ERROR; + hdr.arg = READ_FAILED; + + errormsg = pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot read block %u of '%s': %s", + blknum, from_fullpath, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + goto cleanup; + } + + if (read_len == BLCKSZ) + { + rc = validate_one_page(read_buffer, req->segmentno + blknum, + InvalidXLogRecPtr, &page_st, + req->checksumVersion); + + /* TODO: optimize copy of zeroed page */ + if (rc == PAGE_IS_ZEROED) + break; + else if (rc == PAGE_IS_VALID) + break; + } + + if (feof(in)) + goto eof; +// else /* readed less than BLKSZ bytes, retry */ + + /* File is either has insane header or invalid checksum, + * retry. If retry attempts are exhausted, report corruption. + */ + if (--retry_attempts == 0) + { + hdr.cop = FIO_SEND_FILE_CORRUPTION; + hdr.arg = blknum; + + /* Construct the error message */ + if (rc == PAGE_HEADER_IS_INVALID) + get_header_errormsg(read_buffer, &errormsg); + else if (rc == PAGE_CHECKSUM_MISMATCH) + get_checksum_errormsg(read_buffer, &errormsg, + req->segmentno + blknum); + + /* if error message is not empty, set payload size to its length */ + hdr.size = errormsg ? strlen(errormsg) + 1 : 0; + + /* send header */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + + /* send error message if any */ + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + } + + n_blocks_read++; + + /* + * horizonLsn is not 0 only in case of delta backup. + * As far as unsigned number are always greater or equal than zero, + * there is no sense to add more checks. + */ + if ((req->horizonLsn == InvalidXLogRecPtr) || /* full, page, ptrack */ + (page_st.lsn == InvalidXLogRecPtr) || /* zeroed page */ + (req->horizonLsn > 0 && page_st.lsn > req->horizonLsn)) /* delta */ + { + int compressed_size = 0; + char write_buffer[BLCKSZ*2]; + BackupPageHeader* bph = (BackupPageHeader*)write_buffer; + + /* compress page */ + hdr.cop = FIO_PAGE; + hdr.arg = blknum; + + compressed_size = do_compress(write_buffer + sizeof(BackupPageHeader), + sizeof(write_buffer) - sizeof(BackupPageHeader), + read_buffer, BLCKSZ, req->calg, req->clevel, + NULL); + + if (compressed_size <= 0 || compressed_size >= BLCKSZ) + { + /* Do not compress page */ + memcpy(write_buffer + sizeof(BackupPageHeader), read_buffer, BLCKSZ); + compressed_size = BLCKSZ; + } + bph->block = blknum; + bph->compressed_size = compressed_size; + + hdr.size = compressed_size + sizeof(BackupPageHeader); + + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, write_buffer, hdr.size), hdr.size); + + /* set page header for this file */ + hdr_num++; + if (!headers) + headers = (BackupPageHeader2 *) pgut_malloc(sizeof(BackupPageHeader2)); + else + headers = (BackupPageHeader2 *) pgut_realloc(headers, (hdr_num+1) * sizeof(BackupPageHeader2)); + + headers[hdr_num].block = blknum; + headers[hdr_num].lsn = page_st.lsn; + headers[hdr_num].checksum = page_st.checksum; + headers[hdr_num].pos = cur_pos_out; + + cur_pos_out += hdr.size; + } + + /* next block */ + if (with_pagemap) + { + /* exit if pagemap is exhausted */ + if (!datapagemap_next(iter, &blknum)) + break; + } + else + blknum++; + } + +eof: + /* We are done, send eof */ + hdr.cop = FIO_SEND_FILE_EOF; + hdr.arg = n_blocks_read; + hdr.size = 0; + + if (headers) + { + hdr.size = (hdr_num+2) * sizeof(BackupPageHeader2); + + /* add dummy header */ + headers = (BackupPageHeader2 *) pgut_realloc(headers, (hdr_num+2) * sizeof(BackupPageHeader2)); + headers[hdr_num+1].pos = cur_pos_out; + } + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (headers) + IO_CHECK(fio_write_all(out, headers, hdr.size), hdr.size); + +cleanup: + pg_free(map); + pg_free(iter); + pg_free(errormsg); + pg_free(headers); + if (in) + fclose(in); + return; +} + +/* Receive chunks of compressed data, decompress them and write to + * destination file. + * Return codes: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * WRITE_FAILED (-4) + * ZLIB_ERROR (-5) + * REMOTE_ERROR (-6) + */ +int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, char **errormsg) +{ + fio_header hdr; + int exit_code = SEND_OK; + char *in_buf = pgut_malloc(CHUNK_SIZE); /* buffer for compressed data */ + char *out_buf = pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer for decompressed data */ + size_t path_len = strlen(from_fullpath) + 1; + /* decompressor */ + z_stream *strm = NULL; + + hdr.cop = FIO_SEND_FILE; + hdr.size = path_len; + +// elog(VERBOSE, "Thread [%d]: Attempting to open remote compressed WAL file '%s'", +// thread_num, from_fullpath); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len); + + for (;;) + { + fio_header hdr; + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.cop == FIO_SEND_FILE_EOF) + { + break; + } + else if (hdr.cop == FIO_ERROR) + { + /* handle error, reported by the agent */ + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size); + *errormsg = pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", in_buf); + } + exit_code = hdr.arg; + goto cleanup; + } + else if (hdr.cop == FIO_PAGE) + { + int rc; + Assert(hdr.size <= CHUNK_SIZE); + IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size); + + /* We have received a chunk of compressed data, lets decompress it */ + if (strm == NULL) + { + /* Initialize decompressor */ + strm = pgut_malloc(sizeof(z_stream)); + memset(strm, 0, sizeof(z_stream)); + + /* The fields next_in, avail_in initialized before init */ + strm->next_in = (Bytef *)in_buf; + strm->avail_in = hdr.size; + + rc = inflateInit2(strm, 15 + 16); + + if (rc != Z_OK) + { + *errormsg = pgut_malloc(ERRMSG_MAX_LEN); + snprintf(*errormsg, ERRMSG_MAX_LEN, + "Failed to initialize decompression stream for file '%s': %i: %s", + from_fullpath, rc, strm->msg); + exit_code = ZLIB_ERROR; + goto cleanup; + } + } + else + { + strm->next_in = (Bytef *)in_buf; + strm->avail_in = hdr.size; + } + + strm->next_out = (Bytef *)out_buf; /* output buffer */ + strm->avail_out = OUT_BUF_SIZE; /* free space in output buffer */ + + /* + * From zlib documentation: + * The application must update next_in and avail_in when avail_in + * has dropped to zero. It must update next_out and avail_out when + * avail_out has dropped to zero. + */ + while (strm->avail_in != 0) /* while there is data in input buffer, decompress it */ + { + /* decompress until there is no data to decompress, + * or buffer with uncompressed data is full + */ + rc = inflate(strm, Z_NO_FLUSH); + if (rc == Z_STREAM_END) + /* end of stream */ + break; + else if (rc != Z_OK) + { + /* got an error */ + *errormsg = pgut_malloc(ERRMSG_MAX_LEN); + snprintf(*errormsg, ERRMSG_MAX_LEN, + "Decompression failed for file '%s': %i: %s", + from_fullpath, rc, strm->msg); + exit_code = ZLIB_ERROR; + goto cleanup; + } + + if (strm->avail_out == 0) + { + /* Output buffer is full, write it out */ + if (fwrite(out_buf, 1, OUT_BUF_SIZE, out) != OUT_BUF_SIZE) + { + exit_code = WRITE_FAILED; + goto cleanup; + } + + strm->next_out = (Bytef *)out_buf; /* output buffer */ + strm->avail_out = OUT_BUF_SIZE; + } + } + + /* write out leftovers if any */ + if (strm->avail_out != OUT_BUF_SIZE) + { + int len = OUT_BUF_SIZE - strm->avail_out; + + if (fwrite(out_buf, 1, len, out) != len) + { + exit_code = WRITE_FAILED; + goto cleanup; + } + } + } + else + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + +cleanup: + if (exit_code < OPEN_FAILED) + fio_disconnect(); /* discard possible pending data in pipe */ + + if (strm) + { + inflateEnd(strm); + pg_free(strm); + } + + pg_free(in_buf); + pg_free(out_buf); + return exit_code; +} + +/* Receive chunks of data and write them to destination file. + * Return codes: + * SEND_OK (0) + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * WRITE_FAILED (-4) + * + * OPEN_FAILED and READ_FAIL should also set errormsg. + * If pgFile is not NULL then we must calculate crc and read_size for it. + */ +int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, + pgFile *file, char **errormsg) +{ + fio_header hdr; + int exit_code = SEND_OK; + size_t path_len = strlen(from_fullpath) + 1; + char *buf = pgut_malloc(CHUNK_SIZE); /* buffer */ + + hdr.cop = FIO_SEND_FILE; + hdr.size = path_len; + +// elog(VERBOSE, "Thread [%d]: Attempting to open remote WAL file '%s'", +// thread_num, from_fullpath); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len); + + for (;;) + { + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.cop == FIO_SEND_FILE_EOF) + { + break; + } + else if (hdr.cop == FIO_ERROR) + { + /* handle error, reported by the agent */ + if (hdr.size > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + *errormsg = pgut_malloc(hdr.size); + snprintf(*errormsg, hdr.size, "%s", buf); + } + exit_code = hdr.arg; + break; + } + else if (hdr.cop == FIO_PAGE) + { + Assert(hdr.size <= CHUNK_SIZE); + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + + /* We have received a chunk of data data, lets write it out */ + if (fwrite(buf, 1, hdr.size, out) != hdr.size) + { + exit_code = WRITE_FAILED; + break; + } + + if (file) + { + file->read_size += hdr.size; + COMP_FILE_CRC32(true, file->crc, buf, hdr.size); + } + } + else + { + /* TODO: fio_disconnect may get assert fail when running after this */ + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + } + + if (exit_code < OPEN_FAILED) + fio_disconnect(); /* discard possible pending data in pipe */ + + pg_free(buf); + return exit_code; +} + +/* Send file content + * On error we return FIO_ERROR message with following codes + * FIO_ERROR: + * FILE_MISSING (-1) + * OPEN_FAILED (-2) + * READ_FAILED (-3) + * + * FIO_PAGE + * FIO_SEND_FILE_EOF + * + */ +static void fio_send_file_impl(int out, char const* path) +{ + FILE *fp; + fio_header hdr; + char *buf = pgut_malloc(CHUNK_SIZE); + size_t read_len = 0; + char *errormsg = NULL; + + /* open source file for read */ + /* TODO: check that file is regular file */ + fp = fopen(path, PG_BINARY_R); + if (!fp) + { + hdr.cop = FIO_ERROR; + + /* do not send exact wording of ENOENT error message + * because it is a very common error in our case, so + * error code is enough. + */ + if (errno == ENOENT) + { + hdr.arg = FILE_MISSING; + hdr.size = 0; + } + else + { + hdr.arg = OPEN_FAILED; + errormsg = pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot open file '%s': %s", path, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + } + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + + /* disable stdio buffering */ + setvbuf(fp, NULL, _IONBF, BUFSIZ); + + /* copy content */ + for (;;) + { + read_len = fread(buf, 1, CHUNK_SIZE, fp); + + /* report error */ + if (ferror(fp)) + { + hdr.cop = FIO_ERROR; + errormsg = pgut_malloc(ERRMSG_MAX_LEN); + hdr.arg = READ_FAILED; + /* Construct the error message */ + snprintf(errormsg, ERRMSG_MAX_LEN, "Cannot read from file '%s': %s", path, strerror(errno)); + hdr.size = strlen(errormsg) + 1; + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + + goto cleanup; + } + + if (read_len > 0) + { + /* send chunk */ + hdr.cop = FIO_PAGE; + hdr.size = read_len; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, buf, read_len), read_len); + } + + if (feof(fp)) + break; + } + + /* we are done, send eof */ + hdr.cop = FIO_SEND_FILE_EOF; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + +cleanup: + if (fp) + fclose(fp); + pg_free(buf); + pg_free(errormsg); + return; +} + +/* Compile the array of files located on remote machine in directory root */ +void fio_list_dir(parray *files, const char *root, bool exclude, + bool follow_symlink, bool add_root, bool backup_logs, + bool skip_hidden, int external_dir_num) +{ + fio_header hdr; + fio_list_dir_request req; + char *buf = pgut_malloc(CHUNK_SIZE); + + /* Send to the agent message with parameters for directory listing */ + snprintf(req.path, MAXPGPATH, "%s", root); + req.exclude = exclude; + req.follow_symlink = follow_symlink; + req.add_root = add_root; + req.backup_logs = backup_logs; + req.exclusive_backup = exclusive_backup; + req.skip_hidden = skip_hidden; + req.external_dir_num = external_dir_num; + + hdr.cop = FIO_LIST_DIR; + hdr.size = sizeof(req); + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, &req, hdr.size), hdr.size); + + for (;;) + { + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.cop == FIO_SEND_FILE_EOF) + { + /* the work is done */ + break; + } + else if (hdr.cop == FIO_SEND_FILE) + { + pgFile *file = NULL; + fio_pgFile fio_file; + + /* receive rel_path */ + IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size); + file = pgFileInit(buf); + + /* receive metainformation */ + IO_CHECK(fio_read_all(fio_stdin, &fio_file, sizeof(fio_file)), sizeof(fio_file)); + + file->mode = fio_file.mode; + file->size = fio_file.size; + file->mtime = fio_file.mtime; + file->is_datafile = fio_file.is_datafile; + file->is_database = fio_file.is_database; + file->tblspcOid = fio_file.tblspcOid; + file->dbOid = fio_file.dbOid; + file->relOid = fio_file.relOid; + file->forkName = fio_file.forkName; + file->segno = fio_file.segno; + file->external_dir_num = fio_file.external_dir_num; + + if (fio_file.linked_len > 0) + { + IO_CHECK(fio_read_all(fio_stdin, buf, fio_file.linked_len), fio_file.linked_len); + + file->linked = pgut_malloc(fio_file.linked_len); + snprintf(file->linked, fio_file.linked_len, "%s", buf); + } + +// elog(INFO, "Received file: %s, mode: %u, size: %lu, mtime: %lu", +// file->rel_path, file->mode, file->size, file->mtime); + + parray_append(files, file); + } + else + { + /* TODO: fio_disconnect may get assert fail when running after this */ + elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop); + } + } + + pg_free(buf); +} + + +/* + * To get the arrays of files we use the same function dir_list_file(), + * that is used for local backup. + * After that we iterate over arrays and for every file send at least + * two messages to main process: + * 1. rel_path + * 2. metainformation (size, mtime, etc) + * 3. link path (optional) + * + * TODO: replace FIO_SEND_FILE and FIO_SEND_FILE_EOF with dedicated messages + */ +static void fio_list_dir_impl(int out, char* buf) +{ + int i; + fio_header hdr; + fio_list_dir_request *req = (fio_list_dir_request*) buf; + parray *file_files = parray_new(); + + /* + * Disable logging into console any messages with exception of ERROR messages, + * because currently we have no mechanism to notify the main process + * about then message been sent. + * TODO: correctly send elog messages from agent to main process. + */ + instance_config.logger.log_level_console = ERROR; + exclusive_backup = req->exclusive_backup; + + dir_list_file(file_files, req->path, req->exclude, req->follow_symlink, + req->add_root, req->backup_logs, req->skip_hidden, + req->external_dir_num, FIO_LOCAL_HOST); + + /* send information about files to the main process */ + for (i = 0; i < parray_num(file_files); i++) + { + fio_pgFile fio_file; + pgFile *file = (pgFile *) parray_get(file_files, i); + + fio_file.mode = file->mode; + fio_file.size = file->size; + fio_file.mtime = file->mtime; + fio_file.is_datafile = file->is_datafile; + fio_file.is_database = file->is_database; + fio_file.tblspcOid = file->tblspcOid; + fio_file.dbOid = file->dbOid; + fio_file.relOid = file->relOid; + fio_file.forkName = file->forkName; + fio_file.segno = file->segno; + fio_file.external_dir_num = file->external_dir_num; + + if (file->linked) + fio_file.linked_len = strlen(file->linked) + 1; + else + fio_file.linked_len = 0; + + hdr.cop = FIO_SEND_FILE; + hdr.size = strlen(file->rel_path) + 1; + + /* send rel_path first */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, file->rel_path, hdr.size), hdr.size); + + /* now send file metainformation */ + IO_CHECK(fio_write_all(out, &fio_file, sizeof(fio_file)), sizeof(fio_file)); + + /* If file is a symlink, then send link path */ + if (file->linked) + IO_CHECK(fio_write_all(out, file->linked, fio_file.linked_len), fio_file.linked_len); + + pgFileFree(file); + } + + parray_free(file_files); + hdr.cop = FIO_SEND_FILE_EOF; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); +} + +PageState * +fio_get_checksum_map(const char *fullpath, uint32 checksum_version, int n_blocks, + XLogRecPtr dest_stop_lsn, BlockNumber segmentno, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + fio_checksum_map_request req_hdr; + PageState *checksum_map = NULL; + size_t path_len = strlen(fullpath) + 1; + + req_hdr.n_blocks = n_blocks; + req_hdr.segmentno = segmentno; + req_hdr.stop_lsn = dest_stop_lsn; + req_hdr.checksumVersion = checksum_version; + + hdr.cop = FIO_GET_CHECKSUM_MAP; + hdr.size = sizeof(req_hdr) + path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, &req_hdr, sizeof(req_hdr)), sizeof(req_hdr)); + IO_CHECK(fio_write_all(fio_stdout, fullpath, path_len), path_len); + + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.size > 0) + { + checksum_map = pgut_malloc(n_blocks * sizeof(PageState)); + memset(checksum_map, 0, n_blocks * sizeof(PageState)); + IO_CHECK(fio_read_all(fio_stdin, checksum_map, hdr.size * sizeof(PageState)), hdr.size * sizeof(PageState)); + } + + return checksum_map; + } + else + { + + return get_checksum_map(fullpath, checksum_version, + n_blocks, dest_stop_lsn, segmentno); + } +} + +static void fio_get_checksum_map_impl(int out, char *buf) +{ + fio_header hdr; + PageState *checksum_map = NULL; + char *fullpath = (char*) buf + sizeof(fio_checksum_map_request); + fio_checksum_map_request *req = (fio_checksum_map_request*) buf; + + checksum_map = get_checksum_map(fullpath, req->checksumVersion, + req->n_blocks, req->stop_lsn, req->segmentno); + hdr.size = req->n_blocks; + + /* send array of PageState`s to main process */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size > 0) + IO_CHECK(fio_write_all(out, checksum_map, hdr.size * sizeof(PageState)), hdr.size * sizeof(PageState)); + + pg_free(checksum_map); +} + +datapagemap_t * +fio_get_lsn_map(const char *fullpath, uint32 checksum_version, + int n_blocks, XLogRecPtr shift_lsn, BlockNumber segmentno, + fio_location location) +{ + datapagemap_t* lsn_map = NULL; + + if (fio_is_remote(location)) + { + fio_header hdr; + fio_lsn_map_request req_hdr; + size_t path_len = strlen(fullpath) + 1; + + req_hdr.n_blocks = n_blocks; + req_hdr.segmentno = segmentno; + req_hdr.shift_lsn = shift_lsn; + req_hdr.checksumVersion = checksum_version; + + hdr.cop = FIO_GET_LSN_MAP; + hdr.size = sizeof(req_hdr) + path_len; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, &req_hdr, sizeof(req_hdr)), sizeof(req_hdr)); + IO_CHECK(fio_write_all(fio_stdout, fullpath, path_len), path_len); + + /* receive data */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + + if (hdr.size > 0) + { + lsn_map = pgut_malloc(sizeof(datapagemap_t)); + memset(lsn_map, 0, sizeof(datapagemap_t)); + + lsn_map->bitmap = pgut_malloc(hdr.size); + lsn_map->bitmapsize = hdr.size; + + IO_CHECK(fio_read_all(fio_stdin, lsn_map->bitmap, hdr.size), hdr.size); + } + } + else + { + lsn_map = get_lsn_map(fullpath, checksum_version, n_blocks, + shift_lsn, segmentno); + } + + return lsn_map; +} + +static void fio_get_lsn_map_impl(int out, char *buf) +{ + fio_header hdr; + datapagemap_t *lsn_map = NULL; + char *fullpath = (char*) buf + sizeof(fio_lsn_map_request); + fio_lsn_map_request *req = (fio_lsn_map_request*) buf; + + lsn_map = get_lsn_map(fullpath, req->checksumVersion, req->n_blocks, + req->shift_lsn, req->segmentno); + if (lsn_map) + hdr.size = lsn_map->bitmapsize; + else + hdr.size = 0; + + /* send bitmap to main process */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size > 0) + IO_CHECK(fio_write_all(out, lsn_map->bitmap, hdr.size), hdr.size); + + if (lsn_map) + { + pg_free(lsn_map->bitmap); + pg_free(lsn_map); + } +} + +/* + * Go to the remote host and get postmaster pid from file postmaster.pid + * and check that process is running, if process is running, return its pid number. + */ +pid_t fio_check_postmaster(const char *pgdata, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + + hdr.cop = FIO_CHECK_POSTMASTER; + hdr.size = strlen(pgdata) + 1; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, pgdata, hdr.size), hdr.size); + + /* receive result */ + IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr)); + return hdr.arg; + } + else + return check_postmaster(pgdata); +} + +static void fio_check_postmaster_impl(int out, char *buf) +{ + fio_header hdr; + pid_t postmaster_pid; + char *pgdata = (char*) buf; + + postmaster_pid = check_postmaster(pgdata); + + /* send arrays of checksums to main process */ + hdr.arg = postmaster_pid; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); +} + +/* + * Delete file pointed by the pgFile. + * If the pgFile points directory, the directory must be empty. + */ +void +fio_delete(mode_t mode, const char *fullpath, fio_location location) +{ + if (fio_is_remote(location)) + { + fio_header hdr; + + hdr.cop = FIO_DELETE; + hdr.size = strlen(fullpath) + 1; + hdr.arg = mode; + + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, fullpath, hdr.size), hdr.size); + + } + else + pgFileDelete(mode, fullpath); +} + +static void +fio_delete_impl(mode_t mode, char *buf) +{ + char *fullpath = (char*) buf; + + pgFileDelete(mode, fullpath); +} + +/* Execute commands at remote host */ +void fio_communicate(int in, int out) +{ + /* + * Map of file and directory descriptors. + * The same mapping is used in agent and master process, so we + * can use the same index at both sides. + */ + int fd[FIO_FDMAX]; + DIR* dir[FIO_FDMAX]; + struct dirent* entry; + size_t buf_size = 128*1024; + char* buf = (char*)pgut_malloc(buf_size); + fio_header hdr; + struct stat st; + int rc; + int tmp_fd; + pg_crc32 crc; + +#ifdef WIN32 + SYS_CHECK(setmode(in, _O_BINARY)); + SYS_CHECK(setmode(out, _O_BINARY)); +#endif + + /* Main loop until end of processing all master commands */ + while ((rc = fio_read_all(in, &hdr, sizeof hdr)) == sizeof(hdr)) { + if (hdr.size != 0) { + if (hdr.size > buf_size) { + /* Extend buffer on demand */ + buf_size = hdr.size; + buf = (char*)realloc(buf, buf_size); + } + IO_CHECK(fio_read_all(in, buf, hdr.size), hdr.size); + } + switch (hdr.cop) { + case FIO_LOAD: /* Send file content */ + fio_load_file(out, buf); + break; + case FIO_OPENDIR: /* Open directory for traversal */ + dir[hdr.handle] = opendir(buf); + hdr.arg = dir[hdr.handle] == NULL ? errno : 0; + hdr.size = 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_READDIR: /* Get next directory entry */ + hdr.cop = FIO_SEND; + entry = readdir(dir[hdr.handle]); + if (entry != NULL) + { + hdr.size = sizeof(*entry); + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, entry, hdr.size), hdr.size); + } + else + { + hdr.size = 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + } + break; + case FIO_CLOSEDIR: /* Finish directory traversal */ + SYS_CHECK(closedir(dir[hdr.handle])); + break; + case FIO_OPEN: /* Open file */ + fd[hdr.handle] = open(buf, hdr.arg, FILE_PERMISSIONS); + hdr.arg = fd[hdr.handle] < 0 ? errno : 0; + hdr.size = 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_CLOSE: /* Close file */ + SYS_CHECK(close(fd[hdr.handle])); + break; + case FIO_WRITE: /* Write to the current position in file */ + IO_CHECK(fio_write_all(fd[hdr.handle], buf, hdr.size), hdr.size); + break; + case FIO_WRITE_COMPRESSED: /* Write to the current position in file */ + IO_CHECK(fio_write_compressed_impl(fd[hdr.handle], buf, hdr.size, hdr.arg), BLCKSZ); + break; + case FIO_READ: /* Read from the current position in file */ + if ((size_t)hdr.arg > buf_size) { + buf_size = hdr.arg; + buf = (char*)realloc(buf, buf_size); + } + rc = read(fd[hdr.handle], buf, hdr.arg); + hdr.cop = FIO_SEND; + hdr.size = rc > 0 ? rc : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size != 0) + IO_CHECK(fio_write_all(out, buf, hdr.size), hdr.size); + break; + case FIO_PREAD: /* Read from specified position in file, ignoring pages beyond horizon of delta backup */ + rc = pread(fd[hdr.handle], buf, BLCKSZ, hdr.arg); + hdr.cop = FIO_SEND; + hdr.arg = rc; + hdr.size = rc >= 0 ? rc : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (hdr.size != 0) + IO_CHECK(fio_write_all(out, buf, hdr.size), hdr.size); + break; + case FIO_AGENT_VERSION: + hdr.arg = AGENT_PROTOCOL_VERSION; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_STAT: /* Get information about file with specified path */ + hdr.size = sizeof(st); + rc = hdr.arg ? stat(buf, &st) : lstat(buf, &st); + hdr.arg = rc < 0 ? errno : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(out, &st, sizeof(st)), sizeof(st)); + break; + case FIO_ACCESS: /* Check presence of file with specified name */ + hdr.size = 0; + hdr.arg = access(buf, hdr.arg) < 0 ? errno : 0; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_RENAME: /* Rename file */ + SYS_CHECK(rename(buf, buf + strlen(buf) + 1)); + break; + case FIO_SYMLINK: /* Create symbolic link */ + fio_symlink_impl(out, buf, hdr.arg > 0 ? true : false); + break; + case FIO_UNLINK: /* Remove file or directory (TODO: Win32) */ + SYS_CHECK(remove_file_or_dir(buf)); + break; + case FIO_MKDIR: /* Create directory */ + hdr.size = 0; + hdr.arg = dir_create_dir(buf, hdr.arg); + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_CHMOD: /* Change file mode */ + SYS_CHECK(chmod(buf, hdr.arg)); + break; + case FIO_SEEK: /* Set current position in file */ + SYS_CHECK(lseek(fd[hdr.handle], hdr.arg, SEEK_SET)); + break; + case FIO_TRUNCATE: /* Truncate file */ + SYS_CHECK(ftruncate(fd[hdr.handle], hdr.arg)); + break; + case FIO_LIST_DIR: + fio_list_dir_impl(out, buf); + break; + case FIO_SEND_PAGES: + // buf contain fio_send_request header and bitmap. + fio_send_pages_impl(out, buf); + break; + case FIO_SEND_FILE: + fio_send_file_impl(out, buf); + break; + case FIO_SYNC: + /* open file and fsync it */ + tmp_fd = open(buf, O_WRONLY | PG_BINARY, FILE_PERMISSIONS); + if (tmp_fd < 0) + hdr.arg = errno; + else + { + if (fsync(tmp_fd) == 0) + hdr.arg = 0; + else + hdr.arg = errno; + } + close(tmp_fd); + + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + case FIO_GET_CRC32: + /* calculate crc32 for a file */ + if (hdr.arg == 1) + crc = pgFileGetCRCgz(buf, true, true); + else + crc = pgFileGetCRC(buf, true, true); + IO_CHECK(fio_write_all(out, &crc, sizeof(crc)), sizeof(crc)); + break; + case FIO_GET_CHECKSUM_MAP: + /* calculate crc32 for a file */ + fio_get_checksum_map_impl(out, buf); + break; + case FIO_GET_LSN_MAP: + /* calculate crc32 for a file */ + fio_get_lsn_map_impl(out, buf); + break; + case FIO_CHECK_POSTMASTER: + /* calculate crc32 for a file */ + fio_check_postmaster_impl(out, buf); + break; + case FIO_DELETE: + /* delete file */ + fio_delete_impl(hdr.arg, buf); + break; + case FIO_DISCONNECT: + hdr.cop = FIO_DISCONNECTED; + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + break; + default: + Assert(false); + } + } + free(buf); + if (rc != 0) { /* Not end of stream: normal pipe close */ + perror("read"); + exit(EXIT_FAILURE); + } +} + diff --git a/src/utils/file.h b/src/utils/file.h new file mode 100644 index 000000000..255512f1f --- /dev/null +++ b/src/utils/file.h @@ -0,0 +1,141 @@ +#ifndef __FILE__H__ +#define __FILE__H__ + +#include "storage/bufpage.h" +#include +#include +#include + +#ifdef HAVE_LIBZ +#include +#endif + +typedef enum +{ + /* message for compatibility check */ + FIO_AGENT_VERSION, /* never move this */ + FIO_OPEN, + FIO_CLOSE, + FIO_WRITE, + FIO_SYNC, + FIO_RENAME, + FIO_SYMLINK, + FIO_UNLINK, + FIO_MKDIR, + FIO_CHMOD, + FIO_SEEK, + FIO_TRUNCATE, + FIO_DELETE, + FIO_PREAD, + FIO_READ, + FIO_LOAD, + FIO_STAT, + FIO_SEND, + FIO_ACCESS, + FIO_OPENDIR, + FIO_READDIR, + FIO_CLOSEDIR, + FIO_PAGE, + FIO_WRITE_COMPRESSED, + FIO_GET_CRC32, + /* used for incremental restore */ + FIO_GET_CHECKSUM_MAP, + FIO_GET_LSN_MAP, + /* used in fio_send_pages */ + FIO_SEND_PAGES, + FIO_ERROR, + FIO_SEND_FILE, +// FIO_CHUNK, + FIO_SEND_FILE_EOF, + FIO_SEND_FILE_CORRUPTION, + FIO_SEND_FILE_HEADERS, + /* messages for closing connection */ + FIO_DISCONNECT, + FIO_DISCONNECTED, + FIO_LIST_DIR, + FIO_CHECK_POSTMASTER +} fio_operations; + +typedef enum +{ + FIO_LOCAL_HOST, /* data is locate at local host */ + FIO_DB_HOST, /* data is located at Postgres server host */ + FIO_BACKUP_HOST, /* data is located at backup host */ + FIO_REMOTE_HOST /* date is located at remote host */ +} fio_location; + +#define FIO_FDMAX 64 +#define FIO_PIPE_MARKER 0x40000000 + +#define SYS_CHECK(cmd) do if ((cmd) < 0) { fprintf(stderr, "%s:%d: (%s) %s\n", __FILE__, __LINE__, #cmd, strerror(errno)); exit(EXIT_FAILURE); } while (0) +#define IO_CHECK(cmd, size) do { int _rc = (cmd); if (_rc != (size)) fio_error(_rc, size, __FILE__, __LINE__); } while (0) + +typedef struct +{ +// fio_operations cop; +// 16 + unsigned cop : 32; + unsigned handle : 32; + unsigned size : 32; + unsigned arg; +} fio_header; + +extern fio_location MyLocation; + +/* Check if FILE handle is local or remote (created by FIO) */ +#define fio_is_remote_file(file) ((size_t)(file) <= FIO_FDMAX) + +extern void fio_redirect(int in, int out, int err); +extern void fio_communicate(int in, int out); + +extern int fio_get_agent_version(void); +extern FILE* fio_fopen(char const* name, char const* mode, fio_location location); +extern size_t fio_fwrite(FILE* f, void const* buf, size_t size); +extern ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compress_alg); +extern ssize_t fio_fread(FILE* f, void* buf, size_t size); +extern int fio_pread(FILE* f, void* buf, off_t offs); +extern int fio_fprintf(FILE* f, char const* arg, ...) pg_attribute_printf(2, 3); +extern int fio_fflush(FILE* f); +extern int fio_fseek(FILE* f, off_t offs); +extern int fio_ftruncate(FILE* f, off_t size); +extern int fio_fclose(FILE* f); +extern int fio_ffstat(FILE* f, struct stat* st); +extern void fio_error(int rc, int size, char const* file, int line); + +extern int fio_open(char const* name, int mode, fio_location location); +extern ssize_t fio_write(int fd, void const* buf, size_t size); +extern ssize_t fio_read(int fd, void* buf, size_t size); +extern int fio_flush(int fd); +extern int fio_seek(int fd, off_t offs); +extern int fio_fstat(int fd, struct stat* st); +extern int fio_truncate(int fd, off_t size); +extern int fio_close(int fd); +extern void fio_disconnect(void); +extern int fio_sync(char const* path, fio_location location); +extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress); + +extern int fio_rename(char const* old_path, char const* new_path, fio_location location); +extern int fio_symlink(char const* target, char const* link_path, bool overwrite, fio_location location); +extern int fio_unlink(char const* path, fio_location location); +extern int fio_mkdir(char const* path, int mode, fio_location location); +extern int fio_chmod(char const* path, int mode, fio_location location); +extern int fio_access(char const* path, int mode, fio_location location); +extern int fio_stat(char const* path, struct stat* st, bool follow_symlinks, fio_location location); +extern DIR* fio_opendir(char const* path, fio_location location); +extern struct dirent * fio_readdir(DIR *dirp); +extern int fio_closedir(DIR *dirp); +extern FILE* fio_open_stream(char const* name, fio_location location); +extern int fio_close_stream(FILE* f); + +#ifdef HAVE_LIBZ +extern gzFile fio_gzopen(char const* path, char const* mode, int level, fio_location location); +extern int fio_gzclose(gzFile file); +extern int fio_gzread(gzFile f, void *buf, unsigned size); +extern int fio_gzwrite(gzFile f, void const* buf, unsigned size); +extern int fio_gzeof(gzFile f); +extern z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence); +extern const char* fio_gzerror(gzFile file, int *errnum); +#endif + +#endif + diff --git a/src/utils/json.c b/src/utils/json.c index 3afbe9e70..9f13a958f 100644 --- a/src/utils/json.c +++ b/src/utils/json.c @@ -2,7 +2,7 @@ * * json.c: - make json document. * - * Copyright (c) 2018, Postgres Professional + * Copyright (c) 2018-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -12,6 +12,8 @@ static void json_add_indent(PQExpBuffer buf, int32 level); static void json_add_escaped(PQExpBuffer buf, const char *str); +static bool add_comma = false; + /* * Start or end json token. Currently it is a json object or array. * @@ -25,6 +27,7 @@ json_add(PQExpBuffer buf, JsonToken type, int32 *level) case JT_BEGIN_ARRAY: appendPQExpBufferChar(buf, '['); *level += 1; + add_comma = false; break; case JT_END_ARRAY: *level -= 1; @@ -33,11 +36,13 @@ json_add(PQExpBuffer buf, JsonToken type, int32 *level) else json_add_indent(buf, *level); appendPQExpBufferChar(buf, ']'); + add_comma = true; break; case JT_BEGIN_OBJECT: json_add_indent(buf, *level); appendPQExpBufferChar(buf, '{'); *level += 1; + add_comma = false; break; case JT_END_OBJECT: *level -= 1; @@ -46,6 +51,7 @@ json_add(PQExpBuffer buf, JsonToken type, int32 *level) else json_add_indent(buf, *level); appendPQExpBufferChar(buf, '}'); + add_comma = true; break; default: break; @@ -56,7 +62,7 @@ json_add(PQExpBuffer buf, JsonToken type, int32 *level) * Add json object's key. If it isn't first key we need to add a comma. */ void -json_add_key(PQExpBuffer buf, const char *name, int32 level, bool add_comma) +json_add_key(PQExpBuffer buf, const char *name, int32 level) { if (add_comma) appendPQExpBufferChar(buf, ','); @@ -64,6 +70,8 @@ json_add_key(PQExpBuffer buf, const char *name, int32 level, bool add_comma) json_add_escaped(buf, name); appendPQExpBufferStr(buf, ": "); + + add_comma = true; } /* @@ -72,10 +80,14 @@ json_add_key(PQExpBuffer buf, const char *name, int32 level, bool add_comma) */ void json_add_value(PQExpBuffer buf, const char *name, const char *value, - int32 level, bool add_comma) + int32 level, bool escaped) { - json_add_key(buf, name, level, add_comma); - json_add_escaped(buf, value); + json_add_key(buf, name, level); + + if (escaped) + json_add_escaped(buf, value); + else + appendPQExpBufferStr(buf, value); } static void diff --git a/src/utils/json.h b/src/utils/json.h index cf5a70648..cc9f1168d 100644 --- a/src/utils/json.h +++ b/src/utils/json.h @@ -2,7 +2,7 @@ * * json.h: - prototypes of json output functions. * - * Copyright (c) 2018, Postgres Professional + * Copyright (c) 2018-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -25,9 +25,8 @@ typedef enum } JsonToken; extern void json_add(PQExpBuffer buf, JsonToken type, int32 *level); -extern void json_add_key(PQExpBuffer buf, const char *name, int32 level, - bool add_comma); +extern void json_add_key(PQExpBuffer buf, const char *name, int32 level); extern void json_add_value(PQExpBuffer buf, const char *name, const char *value, - int32 level, bool add_comma); + int32 level, bool escaped); #endif /* PROBACKUP_JSON_H */ diff --git a/src/utils/logger.c b/src/utils/logger.c index 88ee9b6b2..5aee41b46 100644 --- a/src/utils/logger.c +++ b/src/utils/logger.c @@ -2,40 +2,32 @@ * * logger.c: - log events into log file or stderr. * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2019, Postgres Professional * *------------------------------------------------------------------------- */ -#include -#include -#include +#include "postgres_fe.h" + #include -#include +#include "pg_probackup.h" #include "logger.h" #include "pgut.h" -#include "pg_probackup.h" #include "thread.h" +#include -/* Logger parameters */ - -int log_level_console = LOG_LEVEL_CONSOLE_DEFAULT; -int log_level_file = LOG_LEVEL_FILE_DEFAULT; +#include "utils/configuration.h" -char *log_filename = NULL; -char *error_log_filename = NULL; -char *log_directory = NULL; -/* - * If log_path is empty logging is not initialized. - * We will log only into stderr - */ -char log_path[MAXPGPATH] = ""; - -/* Maximum size of an individual log file in kilobytes */ -int log_rotation_size = 0; -/* Maximum lifetime of an individual log file in minutes */ -int log_rotation_age = 0; +/* Logger parameters */ +LoggerConfig logger_config = { + LOG_LEVEL_CONSOLE_DEFAULT, + LOG_LEVEL_FILE_DEFAULT, + LOG_FILENAME_DEFAULT, + NULL, + LOG_ROTATION_SIZE_DEFAULT, + LOG_ROTATION_AGE_DEFAULT +}; /* Implementation for logging.h */ @@ -49,10 +41,10 @@ typedef enum void pg_log(eLogType type, const char *fmt,...) pg_attribute_printf(2, 3); -static void elog_internal(int elevel, bool file_only, const char *fmt, va_list args) - pg_attribute_printf(3, 0); +static void elog_internal(int elevel, bool file_only, const char *message); static void elog_stderr(int elevel, const char *fmt, ...) pg_attribute_printf(2, 3); +static char *get_log_message(const char *fmt, va_list args) pg_attribute_printf(1, 0); /* Functions to work with log files */ static void open_logfile(FILE **file, const char *filename_format); @@ -71,17 +63,56 @@ static bool loggin_in_progress = false; static pthread_mutex_t log_file_mutex = PTHREAD_MUTEX_INITIALIZER; +/* + * Initialize logger. + * + * If log_directory wasn't set by a user we use full path: + * backup_directory/log + */ void -init_logger(const char *root_path) +init_logger(const char *root_path, LoggerConfig *config) { - /* Set log path */ - if (log_level_file != LOG_OFF || error_log_filename) + /* + * If logging to file is enabled and log_directory wasn't set + * by user, init the path with default value: backup_directory/log/ + * */ + if (config->log_level_file != LOG_OFF + && config->log_directory == NULL) { - if (log_directory) - strcpy(log_path, log_directory); - else - join_path_components(log_path, root_path, LOG_DIRECTORY_DEFAULT); + config->log_directory = pgut_malloc(MAXPGPATH); + join_path_components(config->log_directory, + root_path, LOG_DIRECTORY_DEFAULT); } + + if (config->log_directory != NULL) + canonicalize_path(config->log_directory); + + logger_config = *config; + +#if PG_VERSION_NUM >= 120000 + /* Setup logging for functions from other modules called by pg_probackup */ + pg_logging_init(PROGRAM_NAME); + + switch (logger_config.log_level_console) + { + case VERBOSE: + pg_logging_set_level(PG_LOG_DEBUG); + break; + case INFO: + case NOTICE: + case LOG: + pg_logging_set_level(PG_LOG_INFO); + break; + case WARNING: + pg_logging_set_level(PG_LOG_WARNING); + break; + case ERROR: + pg_logging_set_level(PG_LOG_ERROR); + break; + default: + break; + }; +#endif } static void @@ -107,12 +138,6 @@ write_elevel(FILE *stream, int elevel) case ERROR: fputs("ERROR: ", stream); break; - case FATAL: - fputs("FATAL: ", stream); - break; - case PANIC: - fputs("PANIC: ", stream); - break; default: elog_stderr(ERROR, "invalid logging level: %d", elevel); break; @@ -129,69 +154,67 @@ exit_if_necessary(int elevel) { if (elevel > WARNING && !in_cleanup) { - /* Interrupt other possible routines */ - interrupted = true; - if (loggin_in_progress) { loggin_in_progress = false; pthread_mutex_unlock(&log_file_mutex); } + if (remote_agent) + sleep(1); /* Let parent receive sent messages */ + /* If this is not the main thread then don't call exit() */ if (main_tid != pthread_self()) + { + /* Interrupt other possible routines */ + thread_interrupted = true; #ifdef WIN32 ExitThread(elevel); #else pthread_exit(NULL); #endif + } else exit(elevel); } } /* - * Logs to stderr or to log file and exit if ERROR or FATAL. + * Logs to stderr or to log file and exit if ERROR. * * Actual implementation for elog() and pg_log(). */ static void -elog_internal(int elevel, bool file_only, const char *fmt, va_list args) +elog_internal(int elevel, bool file_only, const char *message) { bool write_to_file, write_to_error_log, write_to_stderr; - va_list error_args, - std_args; time_t log_time = (time_t) time(NULL); char strfbuf[128]; + char str_pid[128]; - write_to_file = elevel >= log_level_file && log_path[0] != '\0'; - write_to_error_log = elevel >= ERROR && error_log_filename && - log_path[0] != '\0'; - write_to_stderr = elevel >= log_level_console && !file_only; + write_to_file = elevel >= logger_config.log_level_file + && logger_config.log_directory + && logger_config.log_directory[0] != '\0'; + write_to_error_log = elevel >= ERROR && logger_config.error_log_filename && + logger_config.log_directory && logger_config.log_directory[0] != '\0'; + write_to_stderr = elevel >= logger_config.log_level_console && !file_only; + if (remote_agent) + { + write_to_stderr |= write_to_error_log | write_to_file; + write_to_error_log = write_to_file = false; + } pthread_lock(&log_file_mutex); -#ifdef WIN32 - std_args = NULL; - error_args = NULL; -#endif loggin_in_progress = true; - /* We need copy args only if we need write to error log file */ - if (write_to_error_log) - va_copy(error_args, args); - /* - * We need copy args only if we need write to stderr. But do not copy args - * if we need to log only to stderr. - */ - if (write_to_stderr && write_to_file) - va_copy(std_args, args); - - if (write_to_file || write_to_error_log) + if (write_to_file || write_to_error_log || is_archive_cmd) strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", localtime(&log_time)); + snprintf(str_pid, sizeof(str_pid), "[%d]:", my_pid); + /* * Write message to log file. * Do not write to file if this error was raised during write previous @@ -201,17 +224,17 @@ elog_internal(int elevel, bool file_only, const char *fmt, va_list args) { if (log_file == NULL) { - if (log_filename == NULL) + if (logger_config.log_filename == NULL) open_logfile(&log_file, LOG_FILENAME_DEFAULT); else - open_logfile(&log_file, log_filename); + open_logfile(&log_file, logger_config.log_filename); } - fprintf(log_file, "%s: ", strfbuf); + fprintf(log_file, "%s ", strfbuf); + fprintf(log_file, "%s ", str_pid); write_elevel(log_file, elevel); - vfprintf(log_file, fmt, args); - fputc('\n', log_file); + fprintf(log_file, "%s\n", message); fflush(log_file); } @@ -223,16 +246,14 @@ elog_internal(int elevel, bool file_only, const char *fmt, va_list args) if (write_to_error_log) { if (error_log_file == NULL) - open_logfile(&error_log_file, error_log_filename); + open_logfile(&error_log_file, logger_config.error_log_filename); - fprintf(error_log_file, "%s: ", strfbuf); + fprintf(error_log_file, "%s ", strfbuf); + fprintf(error_log_file, "%s ", str_pid); write_elevel(error_log_file, elevel); - vfprintf(error_log_file, fmt, error_args); - fputc('\n', error_log_file); + fprintf(error_log_file, "%s\n", message); fflush(error_log_file); - - va_end(error_args); } /* @@ -241,16 +262,21 @@ elog_internal(int elevel, bool file_only, const char *fmt, va_list args) */ if (write_to_stderr) { + if (is_archive_cmd) + { + char str_thread[64]; + /* [Issue #213] fix pgbadger parsing */ + snprintf(str_thread, sizeof(str_thread), "[%d-1]:", my_thread_num); + + fprintf(stderr, "%s ", strfbuf); + fprintf(stderr, "%s ", str_pid); + fprintf(stderr, "%s ", str_thread); + } + write_elevel(stderr, elevel); - if (write_to_file) - vfprintf(stderr, fmt, std_args); - else - vfprintf(stderr, fmt, args); - fputc('\n', stderr); - fflush(stderr); - if (write_to_file) - va_end(std_args); + fprintf(stderr, "%s\n", message); + fflush(stderr); } exit_if_necessary(elevel); @@ -272,7 +298,7 @@ elog_stderr(int elevel, const char *fmt, ...) * Do not log message if severity level is less than log_level. * It is the little optimisation to put it here not in elog_internal(). */ - if (elevel < log_level_console && elevel < ERROR) + if (elevel < logger_config.log_level_console && elevel < ERROR) return; va_start(args, fmt); @@ -288,43 +314,83 @@ elog_stderr(int elevel, const char *fmt, ...) } /* - * Logs to stderr or to log file and exit if ERROR or FATAL. + * Formats text data under the control of fmt and returns it in an allocated + * buffer. + */ +static char * +get_log_message(const char *fmt, va_list args) +{ + size_t len = 256; /* initial assumption about buffer size */ + + for (;;) + { + char *result; + size_t newlen; + va_list copy_args; + + result = (char *) pgut_malloc(len); + + /* Try to format the data */ + va_copy(copy_args, args); + newlen = pvsnprintf(result, len, fmt, copy_args); + va_end(copy_args); + + if (newlen < len) + return result; /* success */ + + /* Release buffer and loop around to try again with larger len. */ + pfree(result); + len = newlen; + } +} + +/* + * Logs to stderr or to log file and exit if ERROR. */ void elog(int elevel, const char *fmt, ...) { + char *message; va_list args; /* * Do not log message if severity level is less than log_level. * It is the little optimisation to put it here not in elog_internal(). */ - if (elevel < log_level_console && elevel < log_level_file && elevel < ERROR) + if (elevel < logger_config.log_level_console && + elevel < logger_config.log_level_file && elevel < ERROR) return; va_start(args, fmt); - elog_internal(elevel, false, fmt, args); + message = get_log_message(fmt, args); va_end(args); + + elog_internal(elevel, false, message); + pfree(message); } /* - * Logs only to log file and exit if ERROR or FATAL. + * Logs only to log file and exit if ERROR. */ void elog_file(int elevel, const char *fmt, ...) { + char *message; va_list args; /* * Do not log message if severity level is less than log_level. * It is the little optimisation to put it here not in elog_internal(). */ - if (elevel < log_level_file && elevel < ERROR) + if (elevel < logger_config.log_level_file && elevel < ERROR) return; va_start(args, fmt); - elog_internal(elevel, true, fmt, args); + message = get_log_message(fmt, args); va_end(args); + + elog_internal(elevel, true, message); + pfree(message); } /* @@ -333,6 +399,7 @@ elog_file(int elevel, const char *fmt, ...) void pg_log(eLogType type, const char *fmt, ...) { + char *message; va_list args; int elevel = INFO; @@ -360,12 +427,16 @@ pg_log(eLogType type, const char *fmt, ...) * Do not log message if severity level is less than log_level. * It is the little optimisation to put it here not in elog_internal(). */ - if (elevel < log_level_console && elevel < log_level_file && elevel < ERROR) + if (elevel < logger_config.log_level_console && + elevel < logger_config.log_level_file && elevel < ERROR) return; va_start(args, fmt); - elog_internal(elevel, false, fmt, args); + message = get_log_message(fmt, args); va_end(args); + + elog_internal(elevel, false, message); + pfree(message); } /* @@ -399,10 +470,6 @@ parse_log_level(const char *level) return WARNING; else if (pg_strncasecmp("error", v, len) == 0) return ERROR; - else if (pg_strncasecmp("fatal", v, len) == 0) - return FATAL; - else if (pg_strncasecmp("panic", v, len) == 0) - return PANIC; /* Log level is invalid */ elog(ERROR, "invalid log-level \"%s\"", level); @@ -431,10 +498,6 @@ deparse_log_level(int level) return "WARNING"; case ERROR: return "ERROR"; - case FATAL: - return "FATAL"; - case PANIC: - return "PANIC"; default: elog(ERROR, "invalid log-level %d", level); } @@ -454,17 +517,22 @@ logfile_getname(const char *format, time_t timestamp) size_t len; struct tm *tm = localtime(×tamp); - if (log_path[0] == '\0') + if (logger_config.log_directory == NULL || + logger_config.log_directory[0] == '\0') elog_stderr(ERROR, "logging path is not set"); - filename = (char *) palloc(MAXPGPATH); + filename = (char *) pgut_malloc(MAXPGPATH); - snprintf(filename, MAXPGPATH, "%s/", log_path); + snprintf(filename, MAXPGPATH, "%s/", logger_config.log_directory); len = strlen(filename); /* Treat log_filename as a strftime pattern */ +#ifdef WIN32 + if (pg_strftime(filename + len, MAXPGPATH - len, format, tm) <= 0) +#else if (strftime(filename + len, MAXPGPATH - len, format, tm) <= 0) +#endif elog_stderr(ERROR, "strftime(%s) failed: %s", format, strerror(errno)); return filename; @@ -481,7 +549,7 @@ logfile_open(const char *filename, const char *mode) /* * Create log directory if not present; ignore errors */ - mkdir(log_path, S_IRWXU); + mkdir(logger_config.log_directory, S_IRWXU); fh = fopen(filename, mode); @@ -491,7 +559,7 @@ logfile_open(const char *filename, const char *mode) { int save_errno = errno; - elog_stderr(FATAL, "could not open log file \"%s\": %s", + elog_stderr(ERROR, "could not open log file \"%s\": %s", filename, strerror(errno)); errno = save_errno; } @@ -511,11 +579,12 @@ open_logfile(FILE **file, const char *filename_format) FILE *control_file; time_t cur_time = time(NULL); bool rotation_requested = false, - logfile_exists = false; + logfile_exists = false, + rotation_file_exists = false; filename = logfile_getname(filename_format, cur_time); - /* "log_path" was checked in logfile_getname() */ + /* "log_directory" was checked in logfile_getname() */ snprintf(control, MAXPGPATH, "%s.rotation", filename); if (stat(filename, &st) == -1) @@ -533,21 +602,27 @@ open_logfile(FILE **file, const char *filename_format) logfile_exists = true; /* First check for rotation */ - if (log_rotation_size > 0 || log_rotation_age > 0) + if (logger_config.log_rotation_size > 0 || + logger_config.log_rotation_age > 0) { /* Check for rotation by age */ - if (log_rotation_age > 0) + if (logger_config.log_rotation_age > 0) { struct stat control_st; - if (stat(control, &control_st) == -1) + if (stat(control, &control_st) < 0) { - if (errno != ENOENT) + if (errno == ENOENT) + /* '.rotation' file is not found, force its recreation */ + elog_stderr(WARNING, "missing rotation file: \"%s\"", + control); + else elog_stderr(ERROR, "cannot stat rotation file \"%s\": %s", control, strerror(errno)); } else { + /* rotation file exists */ char buf[1024]; control_file = fopen(control, "r"); @@ -555,33 +630,45 @@ open_logfile(FILE **file, const char *filename_format) elog_stderr(ERROR, "cannot open rotation file \"%s\": %s", control, strerror(errno)); + rotation_file_exists = true; + if (fgets(buf, lengthof(buf), control_file)) { time_t creation_time; if (!parse_int64(buf, (int64 *) &creation_time, 0)) - elog_stderr(ERROR, "rotation file \"%s\" has wrong " + { + /* Inability to parse value from .rotation file is + * concerning but not a critical error + */ + elog_stderr(WARNING, "rotation file \"%s\" has wrong " "creation timestamp \"%s\"", control, buf); - /* Parsed creation time */ - - rotation_requested = (cur_time - creation_time) > - /* convert to seconds */ - log_rotation_age * 60; + rotation_file_exists = false; + } + else + /* Parsed creation time */ + rotation_requested = (cur_time - creation_time) > + /* convert to seconds from milliseconds */ + logger_config.log_rotation_age / 1000; } else - elog_stderr(ERROR, "cannot read creation timestamp from " + { + /* truncated .rotation file is not a critical error */ + elog_stderr(WARNING, "cannot read creation timestamp from " "rotation file \"%s\"", control); + rotation_file_exists = false; + } fclose(control_file); } } /* Check for rotation by size */ - if (!rotation_requested && log_rotation_size > 0) + if (!rotation_requested && logger_config.log_rotation_size > 0) rotation_requested = st.st_size >= /* convert to bytes */ - log_rotation_size * 1024L; + logger_config.log_rotation_size * 1024L; } logfile_open: @@ -592,7 +679,7 @@ open_logfile(FILE **file, const char *filename_format) pfree(filename); /* Rewrite rotation control file */ - if (rotation_requested || !logfile_exists) + if (rotation_requested || !logfile_exists || !rotation_file_exists) { time_t timestamp = time(NULL); diff --git a/src/utils/logger.h b/src/utils/logger.h index 0177c5511..37b6ff095 100644 --- a/src/utils/logger.h +++ b/src/utils/logger.h @@ -2,7 +2,7 @@ * * logger.h: - prototypes of logger functions. * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -10,8 +10,6 @@ #ifndef LOGGER_H #define LOGGER_H -#include "postgres_fe.h" - #define LOG_NONE (-10) /* Log level */ @@ -21,34 +19,38 @@ #define NOTICE (-2) #define WARNING (-1) #define ERROR 1 -#define FATAL 2 -#define PANIC 3 #define LOG_OFF 10 -/* Logger parameters */ +typedef struct LoggerConfig +{ + int log_level_console; + int log_level_file; + char *log_filename; + char *error_log_filename; + char *log_directory; + /* Maximum size of an individual log file in kilobytes */ + uint64 log_rotation_size; + /* Maximum lifetime of an individual log file in minutes */ + uint64 log_rotation_age; +} LoggerConfig; -extern int log_to_file; -extern int log_level_console; -extern int log_level_file; +/* Logger parameters */ +extern LoggerConfig logger_config; -extern char *log_filename; -extern char *error_log_filename; -extern char *log_directory; -extern char log_path[MAXPGPATH]; +#define LOG_ROTATION_SIZE_DEFAULT 0 +#define LOG_ROTATION_AGE_DEFAULT 0 -#define LOG_ROTATION_SIZE_DEFAULT 0 -#define LOG_ROTATION_AGE_DEFAULT 0 -extern int log_rotation_size; -extern int log_rotation_age; +#define LOG_LEVEL_CONSOLE_DEFAULT INFO +#define LOG_LEVEL_FILE_DEFAULT LOG_OFF -#define LOG_LEVEL_CONSOLE_DEFAULT INFO -#define LOG_LEVEL_FILE_DEFAULT LOG_OFF +#define LOG_FILENAME_DEFAULT "pg_probackup.log" +#define LOG_DIRECTORY_DEFAULT "log" #undef elog extern void elog(int elevel, const char *fmt, ...) pg_attribute_printf(2, 3); extern void elog_file(int elevel, const char *fmt, ...) pg_attribute_printf(2, 3); -extern void init_logger(const char *root_path); +extern void init_logger(const char *root_path, LoggerConfig *config); extern int parse_log_level(const char *level); extern const char *deparse_log_level(int level); diff --git a/src/utils/parray.c b/src/utils/parray.c index a9ba7c8e5..31148ee9a 100644 --- a/src/utils/parray.c +++ b/src/utils/parray.c @@ -7,12 +7,15 @@ *------------------------------------------------------------------------- */ -#include "src/pg_probackup.h" +#include "postgres_fe.h" + +#include "parray.h" +#include "pgut.h" /* members of struct parray are hidden from client. */ struct parray { - void **data; /* poiter array, expanded if necessary */ + void **data; /* pointer array, expanded if necessary */ size_t alloced; /* number of elements allocated */ size_t used; /* number of elements in use */ }; @@ -94,7 +97,7 @@ parray_insert(parray *array, size_t index, void *elem) } /* - * Concatinate two parray. + * Concatenate two parray. * parray_concat() appends the copy of the content of src to the end of dest. */ parray * @@ -194,3 +197,16 @@ parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const { return bsearch(&key, array->data, array->used, sizeof(void *), compare); } + +/* checks that parray contains element */ +bool parray_contains(parray *array, void *elem) +{ + int i; + + for (i = 0; i < parray_num(array); i++) + { + if (parray_get(array, i) == elem) + return true; + } + return false; +} diff --git a/src/utils/parray.h b/src/utils/parray.h index 833a6961b..85d7383f3 100644 --- a/src/utils/parray.h +++ b/src/utils/parray.h @@ -30,6 +30,7 @@ extern size_t parray_num(const parray *array); extern void parray_qsort(parray *array, int(*compare)(const void *, const void *)); extern void *parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const void *)); extern void parray_walk(parray *array, void (*action)(void *)); +extern bool parray_contains(parray *array, void *elem); #endif /* PARRAY_H */ diff --git a/src/utils/pgut.c b/src/utils/pgut.c index 37f249a29..6d996f47f 100644 --- a/src/utils/pgut.c +++ b/src/utils/pgut.c @@ -3,1327 +3,56 @@ * pgut.c * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2017-2017, Postgres Professional + * Portions Copyright (c) 2017-2019, Postgres Professional * *------------------------------------------------------------------------- */ +#include "pg_probackup.h" #include "postgres_fe.h" -#include "libpq/pqsignal.h" - -#include "getopt_long.h" -#include -#include -#include - -#include "logger.h" -#include "pgut.h" - -/* old gcc doesn't have LLONG_MAX. */ -#ifndef LLONG_MAX -#if defined(HAVE_LONG_INT_64) || !defined(HAVE_LONG_LONG_INT_64) -#define LLONG_MAX LONG_MAX -#else -#define LLONG_MAX INT64CONST(0x7FFFFFFFFFFFFFFF) -#endif -#endif - -#define MAX_TZDISP_HOUR 15 /* maximum allowed hour part */ -#define SECS_PER_MINUTE 60 -#define MINS_PER_HOUR 60 -#define MAXPG_LSNCOMPONENT 8 - -const char *PROGRAM_NAME = NULL; - -const char *pgut_dbname = NULL; -const char *host = NULL; -const char *port = NULL; -const char *username = NULL; -static char *password = NULL; -bool prompt_password = true; -bool force_password = false; - -/* Database connections */ -static PGcancel *volatile cancel_conn = NULL; - -/* Interrupted by SIGINT (Ctrl+C) ? */ -bool interrupted = false; -bool in_cleanup = false; -bool in_password = false; - -static bool parse_pair(const char buffer[], char key[], char value[]); - -/* Connection routines */ -static void init_cancel_handler(void); -static void on_before_exec(PGconn *conn, PGcancel *thread_cancel_conn); -static void on_after_exec(PGcancel *thread_cancel_conn); -static void on_interrupt(void); -static void on_cleanup(void); -static void exit_or_abort(int exitcode); -static const char *get_username(void); -static pqsigfunc oldhandler = NULL; - -/* - * Unit conversion tables. - * - * Copied from guc.c. - */ -#define MAX_UNIT_LEN 3 /* length of longest recognized unit string */ - -typedef struct -{ - char unit[MAX_UNIT_LEN + 1]; /* unit, as a string, like "kB" or - * "min" */ - int base_unit; /* OPTION_UNIT_XXX */ - int multiplier; /* If positive, multiply the value with this - * for unit -> base_unit conversion. If - * negative, divide (with the absolute value) */ -} unit_conversion; - -static const char *memory_units_hint = "Valid units for this parameter are \"kB\", \"MB\", \"GB\", and \"TB\"."; - -static const unit_conversion memory_unit_conversion_table[] = -{ - {"TB", OPTION_UNIT_KB, 1024 * 1024 * 1024}, - {"GB", OPTION_UNIT_KB, 1024 * 1024}, - {"MB", OPTION_UNIT_KB, 1024}, - {"KB", OPTION_UNIT_KB, 1}, - {"kB", OPTION_UNIT_KB, 1}, - - {"TB", OPTION_UNIT_BLOCKS, (1024 * 1024 * 1024) / (BLCKSZ / 1024)}, - {"GB", OPTION_UNIT_BLOCKS, (1024 * 1024) / (BLCKSZ / 1024)}, - {"MB", OPTION_UNIT_BLOCKS, 1024 / (BLCKSZ / 1024)}, - {"kB", OPTION_UNIT_BLOCKS, -(BLCKSZ / 1024)}, - - {"TB", OPTION_UNIT_XBLOCKS, (1024 * 1024 * 1024) / (XLOG_BLCKSZ / 1024)}, - {"GB", OPTION_UNIT_XBLOCKS, (1024 * 1024) / (XLOG_BLCKSZ / 1024)}, - {"MB", OPTION_UNIT_XBLOCKS, 1024 / (XLOG_BLCKSZ / 1024)}, - {"kB", OPTION_UNIT_XBLOCKS, -(XLOG_BLCKSZ / 1024)}, - - {"TB", OPTION_UNIT_XSEGS, (1024 * 1024 * 1024) / (XLOG_SEG_SIZE / 1024)}, - {"GB", OPTION_UNIT_XSEGS, (1024 * 1024) / (XLOG_SEG_SIZE / 1024)}, - {"MB", OPTION_UNIT_XSEGS, -(XLOG_SEG_SIZE / (1024 * 1024))}, - {"kB", OPTION_UNIT_XSEGS, -(XLOG_SEG_SIZE / 1024)}, - - {""} /* end of table marker */ -}; - -static const char *time_units_hint = "Valid units for this parameter are \"ms\", \"s\", \"min\", \"h\", and \"d\"."; - -static const unit_conversion time_unit_conversion_table[] = -{ - {"d", OPTION_UNIT_MS, 1000 * 60 * 60 * 24}, - {"h", OPTION_UNIT_MS, 1000 * 60 * 60}, - {"min", OPTION_UNIT_MS, 1000 * 60}, - {"s", OPTION_UNIT_MS, 1000}, - {"ms", OPTION_UNIT_MS, 1}, - - {"d", OPTION_UNIT_S, 60 * 60 * 24}, - {"h", OPTION_UNIT_S, 60 * 60}, - {"min", OPTION_UNIT_S, 60}, - {"s", OPTION_UNIT_S, 1}, - {"ms", OPTION_UNIT_S, -1000}, - - {"d", OPTION_UNIT_MIN, 60 * 24}, - {"h", OPTION_UNIT_MIN, 60}, - {"min", OPTION_UNIT_MIN, 1}, - {"s", OPTION_UNIT_MIN, -60}, - {"ms", OPTION_UNIT_MIN, -1000 * 60}, - - {""} /* end of table marker */ -}; - -static size_t -option_length(const pgut_option opts[]) -{ - size_t len; - - for (len = 0; opts && opts[len].type; len++) { } - - return len; -} - -static int -option_has_arg(char type) -{ - switch (type) - { - case 'b': - case 'B': - return no_argument; - default: - return required_argument; - } -} - -static void -option_copy(struct option dst[], const pgut_option opts[], size_t len) -{ - size_t i; - - for (i = 0; i < len; i++) - { - dst[i].name = opts[i].lname; - dst[i].has_arg = option_has_arg(opts[i].type); - dst[i].flag = NULL; - dst[i].val = opts[i].sname; - } -} - -static pgut_option * -option_find(int c, pgut_option opts1[]) -{ - size_t i; - - for (i = 0; opts1 && opts1[i].type; i++) - if (opts1[i].sname == c) - return &opts1[i]; - - return NULL; /* not found */ -} - -static void -assign_option(pgut_option *opt, const char *optarg, pgut_optsrc src) -{ - const char *message; - - if (opt == NULL) - { - fprintf(stderr, "Try \"%s --help\" for more information.\n", PROGRAM_NAME); - exit_or_abort(ERROR); - } - - if (opt->source > src) - { - /* high prior value has been set already. */ - return; - } - /* Allow duplicate entries for function option */ - else if (src >= SOURCE_CMDLINE && opt->source >= src && opt->type != 'f') - { - message = "specified only once"; - } - else - { - pgut_optsrc orig_source = opt->source; - - /* can be overwritten if non-command line source */ - opt->source = src; - - switch (opt->type) - { - case 'b': - case 'B': - if (optarg == NULL) - { - *((bool *) opt->var) = (opt->type == 'b'); - return; - } - else if (parse_bool(optarg, (bool *) opt->var)) - { - return; - } - message = "a boolean"; - break; - case 'f': - ((pgut_optfn) opt->var)(opt, optarg); - return; - case 'i': - if (parse_int32(optarg, opt->var, opt->flags)) - return; - message = "a 32bit signed integer"; - break; - case 'u': - if (parse_uint32(optarg, opt->var, opt->flags)) - return; - message = "a 32bit unsigned integer"; - break; - case 'I': - if (parse_int64(optarg, opt->var, opt->flags)) - return; - message = "a 64bit signed integer"; - break; - case 'U': - if (parse_uint64(optarg, opt->var, opt->flags)) - return; - message = "a 64bit unsigned integer"; - break; - case 's': - if (orig_source != SOURCE_DEFAULT) - free(*(char **) opt->var); - *(char **) opt->var = pgut_strdup(optarg); - if (strcmp(optarg,"") != 0) - return; - message = "a valid string. But provided: "; - break; - case 't': - if (parse_time(optarg, opt->var, - opt->source == SOURCE_FILE)) - return; - message = "a time"; - break; - default: - elog(ERROR, "invalid option type: %c", opt->type); - return; /* keep compiler quiet */ - } - } - - if (isprint(opt->sname)) - elog(ERROR, "option -%c, --%s should be %s: '%s'", - opt->sname, opt->lname, message, optarg); - else - elog(ERROR, "option --%s should be %s: '%s'", - opt->lname, message, optarg); -} - -/* - * Convert a value from one of the human-friendly units ("kB", "min" etc.) - * to the given base unit. 'value' and 'unit' are the input value and unit - * to convert from. The converted value is stored in *base_value. - * - * Returns true on success, false if the input unit is not recognized. - */ -static bool -convert_to_base_unit(int64 value, const char *unit, - int base_unit, int64 *base_value) -{ - const unit_conversion *table; - int i; - - if (base_unit & OPTION_UNIT_MEMORY) - table = memory_unit_conversion_table; - else - table = time_unit_conversion_table; - - for (i = 0; *table[i].unit; i++) - { - if (base_unit == table[i].base_unit && - strcmp(unit, table[i].unit) == 0) - { - if (table[i].multiplier < 0) - *base_value = value / (-table[i].multiplier); - else - *base_value = value * table[i].multiplier; - return true; - } - } - return false; -} - -/* - * Unsigned variant of convert_to_base_unit() - */ -static bool -convert_to_base_unit_u(uint64 value, const char *unit, - int base_unit, uint64 *base_value) -{ - const unit_conversion *table; - int i; - - if (base_unit & OPTION_UNIT_MEMORY) - table = memory_unit_conversion_table; - else - table = time_unit_conversion_table; - - for (i = 0; *table[i].unit; i++) - { - if (base_unit == table[i].base_unit && - strcmp(unit, table[i].unit) == 0) - { - if (table[i].multiplier < 0) - *base_value = value / (-table[i].multiplier); - else - *base_value = value * table[i].multiplier; - return true; - } - } - return false; -} - -/* - * Convert a value in some base unit to a human-friendly unit. The output - * unit is chosen so that it's the greatest unit that can represent the value - * without loss. For example, if the base unit is GUC_UNIT_KB, 1024 is - * converted to 1 MB, but 1025 is represented as 1025 kB. - */ -void -convert_from_base_unit(int64 base_value, int base_unit, - int64 *value, const char **unit) -{ - const unit_conversion *table; - int i; - - *unit = NULL; - - if (base_unit & OPTION_UNIT_MEMORY) - table = memory_unit_conversion_table; - else - table = time_unit_conversion_table; - - for (i = 0; *table[i].unit; i++) - { - if (base_unit == table[i].base_unit) - { - /* - * Accept the first conversion that divides the value evenly. We - * assume that the conversions for each base unit are ordered from - * greatest unit to the smallest! - */ - if (table[i].multiplier < 0) - { - *value = base_value * (-table[i].multiplier); - *unit = table[i].unit; - break; - } - else if (base_value % table[i].multiplier == 0) - { - *value = base_value / table[i].multiplier; - *unit = table[i].unit; - break; - } - } - } - - Assert(*unit != NULL); -} - -/* - * Unsigned variant of convert_from_base_unit() - */ -void -convert_from_base_unit_u(uint64 base_value, int base_unit, - uint64 *value, const char **unit) -{ - const unit_conversion *table; - int i; - - *unit = NULL; - - if (base_unit & OPTION_UNIT_MEMORY) - table = memory_unit_conversion_table; - else - table = time_unit_conversion_table; - - for (i = 0; *table[i].unit; i++) - { - if (base_unit == table[i].base_unit) - { - /* - * Accept the first conversion that divides the value evenly. We - * assume that the conversions for each base unit are ordered from - * greatest unit to the smallest! - */ - if (table[i].multiplier < 0) - { - *value = base_value * (-table[i].multiplier); - *unit = table[i].unit; - break; - } - else if (base_value % table[i].multiplier == 0) - { - *value = base_value / table[i].multiplier; - *unit = table[i].unit; - break; - } - } - } - - Assert(*unit != NULL); -} - -static bool -parse_unit(char *unit_str, int flags, int64 value, int64 *base_value) -{ - /* allow whitespace between integer and unit */ - while (isspace((unsigned char) *unit_str)) - unit_str++; - - /* Handle possible unit */ - if (*unit_str != '\0') - { - char unit[MAX_UNIT_LEN + 1]; - int unitlen; - bool converted = false; - - if ((flags & OPTION_UNIT) == 0) - return false; /* this setting does not accept a unit */ - - unitlen = 0; - while (*unit_str != '\0' && !isspace((unsigned char) *unit_str) && - unitlen < MAX_UNIT_LEN) - unit[unitlen++] = *(unit_str++); - unit[unitlen] = '\0'; - /* allow whitespace after unit */ - while (isspace((unsigned char) *unit_str)) - unit_str++; - - if (*unit_str == '\0') - converted = convert_to_base_unit(value, unit, (flags & OPTION_UNIT), - base_value); - if (!converted) - return false; - } - - return true; -} - -/* - * Unsigned variant of parse_unit() - */ -static bool -parse_unit_u(char *unit_str, int flags, uint64 value, uint64 *base_value) -{ - /* allow whitespace between integer and unit */ - while (isspace((unsigned char) *unit_str)) - unit_str++; - - /* Handle possible unit */ - if (*unit_str != '\0') - { - char unit[MAX_UNIT_LEN + 1]; - int unitlen; - bool converted = false; - - if ((flags & OPTION_UNIT) == 0) - return false; /* this setting does not accept a unit */ - - unitlen = 0; - while (*unit_str != '\0' && !isspace((unsigned char) *unit_str) && - unitlen < MAX_UNIT_LEN) - unit[unitlen++] = *(unit_str++); - unit[unitlen] = '\0'; - /* allow whitespace after unit */ - while (isspace((unsigned char) *unit_str)) - unit_str++; - - if (*unit_str == '\0') - converted = convert_to_base_unit_u(value, unit, (flags & OPTION_UNIT), - base_value); - if (!converted) - return false; - } - - return true; -} - -/* - * Try to interpret value as boolean value. Valid values are: true, - * false, yes, no, on, off, 1, 0; as well as unique prefixes thereof. - * If the string parses okay, return true, else false. - * If okay and result is not NULL, return the value in *result. - */ -bool -parse_bool(const char *value, bool *result) -{ - return parse_bool_with_len(value, strlen(value), result); -} - -bool -parse_bool_with_len(const char *value, size_t len, bool *result) -{ - switch (*value) - { - case 't': - case 'T': - if (pg_strncasecmp(value, "true", len) == 0) - { - if (result) - *result = true; - return true; - } - break; - case 'f': - case 'F': - if (pg_strncasecmp(value, "false", len) == 0) - { - if (result) - *result = false; - return true; - } - break; - case 'y': - case 'Y': - if (pg_strncasecmp(value, "yes", len) == 0) - { - if (result) - *result = true; - return true; - } - break; - case 'n': - case 'N': - if (pg_strncasecmp(value, "no", len) == 0) - { - if (result) - *result = false; - return true; - } - break; - case 'o': - case 'O': - /* 'o' is not unique enough */ - if (pg_strncasecmp(value, "on", (len > 2 ? len : 2)) == 0) - { - if (result) - *result = true; - return true; - } - else if (pg_strncasecmp(value, "off", (len > 2 ? len : 2)) == 0) - { - if (result) - *result = false; - return true; - } - break; - case '1': - if (len == 1) - { - if (result) - *result = true; - return true; - } - break; - case '0': - if (len == 1) - { - if (result) - *result = false; - return true; - } - break; - default: - break; - } - - if (result) - *result = false; /* suppress compiler warning */ - return false; -} - -/* - * Parse string as 32bit signed int. - * valid range: -2147483648 ~ 2147483647 - */ -bool -parse_int32(const char *value, int32 *result, int flags) -{ - int64 val; - char *endptr; - - if (strcmp(value, INFINITE_STR) == 0) - { - *result = INT_MAX; - return true; - } - - errno = 0; - val = strtol(value, &endptr, 0); - if (endptr == value || (*endptr && flags == 0)) - return false; - - if (errno == ERANGE || val != (int64) ((int32) val)) - return false; - - if (!parse_unit(endptr, flags, val, &val)) - return false; - - *result = val; - - return true; -} - -/* - * Parse string as 32bit unsigned int. - * valid range: 0 ~ 4294967295 (2^32-1) - */ -bool -parse_uint32(const char *value, uint32 *result, int flags) -{ - uint64 val; - char *endptr; - - if (strcmp(value, INFINITE_STR) == 0) - { - *result = UINT_MAX; - return true; - } - - errno = 0; - val = strtoul(value, &endptr, 0); - if (endptr == value || (*endptr && flags == 0)) - return false; - - if (errno == ERANGE || val != (uint64) ((uint32) val)) - return false; - - if (!parse_unit_u(endptr, flags, val, &val)) - return false; - - *result = val; - - return true; -} - -/* - * Parse string as int64 - * valid range: -9223372036854775808 ~ 9223372036854775807 - */ -bool -parse_int64(const char *value, int64 *result, int flags) -{ - int64 val; - char *endptr; - - if (strcmp(value, INFINITE_STR) == 0) - { - *result = LLONG_MAX; - return true; - } - - errno = 0; -#if defined(HAVE_LONG_INT_64) - val = strtol(value, &endptr, 0); -#elif defined(HAVE_LONG_LONG_INT_64) - val = strtoll(value, &endptr, 0); -#else - val = strtol(value, &endptr, 0); -#endif - if (endptr == value || (*endptr && flags == 0)) - return false; - - if (errno == ERANGE) - return false; - - if (!parse_unit(endptr, flags, val, &val)) - return false; - - *result = val; - - return true; -} - -/* - * Parse string as uint64 - * valid range: 0 ~ (2^64-1) - */ -bool -parse_uint64(const char *value, uint64 *result, int flags) -{ - uint64 val; - char *endptr; - - if (strcmp(value, INFINITE_STR) == 0) - { -#if defined(HAVE_LONG_INT_64) - *result = ULONG_MAX; -#elif defined(HAVE_LONG_LONG_INT_64) - *result = ULLONG_MAX; -#else - *result = ULONG_MAX; -#endif - return true; - } - - errno = 0; -#if defined(HAVE_LONG_INT_64) - val = strtoul(value, &endptr, 0); -#elif defined(HAVE_LONG_LONG_INT_64) - val = strtoull(value, &endptr, 0); -#else - val = strtoul(value, &endptr, 0); -#endif - if (endptr == value || (*endptr && flags == 0)) - return false; - - if (errno == ERANGE) - return false; - - if (!parse_unit_u(endptr, flags, val, &val)) - return false; - - *result = val; - - return true; -} - -/* - * Convert ISO-8601 format string to time_t value. - * - * If utc_default is true, then if timezone offset isn't specified tz will be - * +00:00. - */ -bool -parse_time(const char *value, time_t *result, bool utc_default) -{ - size_t len; - int fields_num, - tz = 0, - i; - bool tz_set = false; - char *tmp; - struct tm tm; - char junk[2]; - - /* tmp = replace( value, !isalnum, ' ' ) */ - tmp = pgut_malloc(strlen(value) + + 1); - len = 0; - fields_num = 1; - - while (*value) - { - if (IsAlnum(*value)) - { - tmp[len++] = *value; - value++; - } - else if (fields_num < 6) - { - fields_num++; - tmp[len++] = ' '; - value++; - } - /* timezone field is 7th */ - else if ((*value == '-' || *value == '+') && fields_num == 6) - { - int hr, - min, - sec = 0; - char *cp; - - errno = 0; - hr = strtol(value + 1, &cp, 10); - if ((value + 1) == cp || errno == ERANGE) - return false; - - /* explicit delimiter? */ - if (*cp == ':') - { - errno = 0; - min = strtol(cp + 1, &cp, 10); - if (errno == ERANGE) - return false; - if (*cp == ':') - { - errno = 0; - sec = strtol(cp + 1, &cp, 10); - if (errno == ERANGE) - return false; - } - } - /* otherwise, might have run things together... */ - else if (*cp == '\0' && strlen(value) > 3) - { - min = hr % 100; - hr = hr / 100; - /* we could, but don't, support a run-together hhmmss format */ - } - else - min = 0; - - /* Range-check the values; see notes in datatype/timestamp.h */ - if (hr < 0 || hr > MAX_TZDISP_HOUR) - return false; - if (min < 0 || min >= MINS_PER_HOUR) - return false; - if (sec < 0 || sec >= SECS_PER_MINUTE) - return false; - - tz = (hr * MINS_PER_HOUR + min) * SECS_PER_MINUTE + sec; - if (*value == '-') - tz = -tz; - - tz_set = true; - - fields_num++; - value = cp; - } - /* wrong format */ - else if (!IsSpace(*value)) - return false; - } - tmp[len] = '\0'; - - /* parse for "YYYY-MM-DD HH:MI:SS" */ - memset(&tm, 0, sizeof(tm)); - tm.tm_year = 0; /* tm_year is year - 1900 */ - tm.tm_mon = 0; /* tm_mon is 0 - 11 */ - tm.tm_mday = 1; /* tm_mday is 1 - 31 */ - tm.tm_hour = 0; - tm.tm_min = 0; - tm.tm_sec = 0; - i = sscanf(tmp, "%04d %02d %02d %02d %02d %02d%1s", - &tm.tm_year, &tm.tm_mon, &tm.tm_mday, - &tm.tm_hour, &tm.tm_min, &tm.tm_sec, junk); - free(tmp); - - if (i < 1 || 6 < i) - return false; - - /* adjust year */ - if (tm.tm_year < 100) - tm.tm_year += 2000 - 1900; - else if (tm.tm_year >= 1900) - tm.tm_year -= 1900; - - /* adjust month */ - if (i > 1) - tm.tm_mon -= 1; - - /* determine whether Daylight Saving Time is in effect */ - tm.tm_isdst = -1; - - *result = mktime(&tm); - - /* adjust time zone */ - if (tz_set || utc_default) - { - time_t ltime = time(NULL); - struct tm *ptm = gmtime(<ime); - time_t gmt = mktime(ptm); - time_t offset; - - /* UTC time */ - *result -= tz; - - /* Get local time */ - ptm = localtime(<ime); - offset = ltime - gmt + (ptm->tm_isdst ? 3600 : 0); - - *result += offset; - } - - return true; -} - -/* - * Try to parse value as an integer. The accepted formats are the - * usual decimal, octal, or hexadecimal formats, optionally followed by - * a unit name if "flags" indicates a unit is allowed. - * - * If the string parses okay, return true, else false. - * If okay and result is not NULL, return the value in *result. - * If not okay and hintmsg is not NULL, *hintmsg is set to a suitable - * HINT message, or NULL if no hint provided. - */ -bool -parse_int(const char *value, int *result, int flags, const char **hintmsg) -{ - int64 val; - char *endptr; - - /* To suppress compiler warnings, always set output params */ - if (result) - *result = 0; - if (hintmsg) - *hintmsg = NULL; - - /* We assume here that int64 is at least as wide as long */ - errno = 0; - val = strtol(value, &endptr, 0); - - if (endptr == value) - return false; /* no HINT for integer syntax error */ - - if (errno == ERANGE || val != (int64) ((int32) val)) - { - if (hintmsg) - *hintmsg = "Value exceeds integer range."; - return false; - } - - /* allow whitespace between integer and unit */ - while (isspace((unsigned char) *endptr)) - endptr++; - - /* Handle possible unit */ - if (*endptr != '\0') - { - char unit[MAX_UNIT_LEN + 1]; - int unitlen; - bool converted = false; - - if ((flags & OPTION_UNIT) == 0) - return false; /* this setting does not accept a unit */ - - unitlen = 0; - while (*endptr != '\0' && !isspace((unsigned char) *endptr) && - unitlen < MAX_UNIT_LEN) - unit[unitlen++] = *(endptr++); - unit[unitlen] = '\0'; - /* allow whitespace after unit */ - while (isspace((unsigned char) *endptr)) - endptr++; - - if (*endptr == '\0') - converted = convert_to_base_unit(val, unit, (flags & OPTION_UNIT), - &val); - if (!converted) - { - /* invalid unit, or garbage after the unit; set hint and fail. */ - if (hintmsg) - { - if (flags & OPTION_UNIT_MEMORY) - *hintmsg = memory_units_hint; - else - *hintmsg = time_units_hint; - } - return false; - } - - /* Check for overflow due to units conversion */ - if (val != (int64) ((int32) val)) - { - if (hintmsg) - *hintmsg = "Value exceeds integer range."; - return false; - } - } - - if (result) - *result = (int) val; - return true; -} - -bool -parse_lsn(const char *value, XLogRecPtr *result) -{ - uint32 xlogid; - uint32 xrecoff; - int len1; - int len2; - - len1 = strspn(value, "0123456789abcdefABCDEF"); - if (len1 < 1 || len1 > MAXPG_LSNCOMPONENT || value[len1] != '/') - elog(ERROR, "invalid LSN \"%s\"", value); - len2 = strspn(value + len1 + 1, "0123456789abcdefABCDEF"); - if (len2 < 1 || len2 > MAXPG_LSNCOMPONENT || value[len1 + 1 + len2] != '\0') - elog(ERROR, "invalid LSN \"%s\"", value); - - if (sscanf(value, "%X/%X", &xlogid, &xrecoff) == 2) - *result = (XLogRecPtr) ((uint64) xlogid << 32) | xrecoff; - else - { - elog(ERROR, "invalid LSN \"%s\"", value); - return false; - } - - return true; -} - -static char * -longopts_to_optstring(const struct option opts[], const size_t len) -{ - size_t i; - char *result; - char *s; - - result = pgut_malloc(len * 2 + 1); - - s = result; - for (i = 0; i < len; i++) - { - if (!isprint(opts[i].val)) - continue; - *s++ = opts[i].val; - if (opts[i].has_arg != no_argument) - *s++ = ':'; - } - *s = '\0'; - - return result; -} - -void -pgut_getopt_env(pgut_option options[]) -{ - size_t i; - - for (i = 0; options && options[i].type; i++) - { - pgut_option *opt = &options[i]; - const char *value = NULL; - - /* If option was already set do not check env */ - if (opt->source > SOURCE_ENV || opt->allowed < SOURCE_ENV) - continue; - - if (strcmp(opt->lname, "pgdata") == 0) - value = getenv("PGDATA"); - if (strcmp(opt->lname, "port") == 0) - value = getenv("PGPORT"); - if (strcmp(opt->lname, "host") == 0) - value = getenv("PGHOST"); - if (strcmp(opt->lname, "username") == 0) - value = getenv("PGUSER"); - if (strcmp(opt->lname, "pgdatabase") == 0) - { - value = getenv("PGDATABASE"); - if (value == NULL) - value = getenv("PGUSER"); - if (value == NULL) - value = get_username(); - } - - if (value) - assign_option(opt, value, SOURCE_ENV); - } -} - -int -pgut_getopt(int argc, char **argv, pgut_option options[]) -{ - int c; - int optindex = 0; - char *optstring; - pgut_option *opt; - struct option *longopts; - size_t len; - - len = option_length(options); - longopts = pgut_newarray(struct option, len + 1 /* zero/end option */); - option_copy(longopts, options, len); - - optstring = longopts_to_optstring(longopts, len); - - /* Assign named options */ - while ((c = getopt_long(argc, argv, optstring, longopts, &optindex)) != -1) - { - opt = option_find(c, options); - if (opt && opt->allowed < SOURCE_CMDLINE) - elog(ERROR, "option %s cannot be specified in command line", - opt->lname); - /* Check 'opt == NULL' is performed in assign_option() */ - assign_option(opt, optarg, SOURCE_CMDLINE); - } - - init_cancel_handler(); - atexit(on_cleanup); - - return optind; -} - -/* compare two strings ignore cases and ignore -_ */ -static bool -key_equals(const char *lhs, const char *rhs) -{ - for (; *lhs && *rhs; lhs++, rhs++) - { - if (strchr("-_ ", *lhs)) - { - if (!strchr("-_ ", *rhs)) - return false; - } - else if (ToLower(*lhs) != ToLower(*rhs)) - return false; - } - - return *lhs == '\0' && *rhs == '\0'; -} - -/* - * Get configuration from configuration file. - * Return number of parsed options - */ -int -pgut_readopt(const char *path, pgut_option options[], int elevel) -{ - FILE *fp; - char buf[1024]; - char key[1024]; - char value[1024]; - int parsed_options = 0; - - if (!options) - return parsed_options; - - if ((fp = pgut_fopen(path, "rt", true)) == NULL) - return parsed_options; - - while (fgets(buf, lengthof(buf), fp)) - { - size_t i; - - for (i = strlen(buf); i > 0 && IsSpace(buf[i - 1]); i--) - buf[i - 1] = '\0'; - - if (parse_pair(buf, key, value)) - { - for (i = 0; options[i].type; i++) - { - pgut_option *opt = &options[i]; - - if (key_equals(key, opt->lname)) - { - if (opt->allowed < SOURCE_FILE && - opt->allowed != SOURCE_FILE_STRICT) - elog(elevel, "option %s cannot be specified in file", opt->lname); - else if (opt->source <= SOURCE_FILE) - { - assign_option(opt, value, SOURCE_FILE); - parsed_options++; - } - break; - } - } - if (!options[i].type) - elog(elevel, "invalid option \"%s\" in file \"%s\"", key, path); - } - } - - fclose(fp); - - return parsed_options; -} - -static const char * -skip_space(const char *str, const char *line) -{ - while (IsSpace(*str)) { str++; } - return str; -} - -static const char * -get_next_token(const char *src, char *dst, const char *line) -{ - const char *s; - int i; - int j; - - if ((s = skip_space(src, line)) == NULL) - return NULL; - - /* parse quoted string */ - if (*s == '\'') - { - s++; - for (i = 0, j = 0; s[i] != '\0'; i++) - { - if (s[i] == '\\') - { - i++; - switch (s[i]) - { - case 'b': - dst[j] = '\b'; - break; - case 'f': - dst[j] = '\f'; - break; - case 'n': - dst[j] = '\n'; - break; - case 'r': - dst[j] = '\r'; - break; - case 't': - dst[j] = '\t'; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - { - int k; - long octVal = 0; - - for (k = 0; - s[i + k] >= '0' && s[i + k] <= '7' && k < 3; - k++) - octVal = (octVal << 3) + (s[i + k] - '0'); - i += k - 1; - dst[j] = ((char) octVal); - } - break; - default: - dst[j] = s[i]; - break; - } - } - else if (s[i] == '\'') - { - i++; - /* doubled quote becomes just one quote */ - if (s[i] == '\'') - dst[j] = s[i]; - else - break; - } - else - dst[j] = s[i]; - j++; - } - } - else - { - i = j = strcspn(s, "#\n\r\t\v"); - memcpy(dst, s, j); - } - - dst[j] = '\0'; - return s + i; -} - -static bool -parse_pair(const char buffer[], char key[], char value[]) -{ - const char *start; - const char *end; - - key[0] = value[0] = '\0'; - /* - * parse key - */ - start = buffer; - if ((start = skip_space(start, buffer)) == NULL) - return false; +#include "getopt_long.h" +#include "libpq-fe.h" +#include "libpq/pqsignal.h" +#include "pqexpbuffer.h" - end = start + strcspn(start, "=# \n\r\t\v"); +#include - /* skip blank buffer */ - if (end - start <= 0) - { - if (*start == '=') - elog(ERROR, "syntax error in \"%s\"", buffer); - return false; - } +#include "pgut.h" +#include "logger.h" +#include "file.h" - /* key found */ - strncpy(key, start, end - start); - key[end - start] = '\0'; - /* find key and value split char */ - if ((start = skip_space(end, buffer)) == NULL) - return false; +static char *password = NULL; +bool prompt_password = true; +bool force_password = false; - if (*start != '=') - { - elog(ERROR, "syntax error in \"%s\"", buffer); - return false; - } +/* Database connections */ +static PGcancel *volatile cancel_conn = NULL; - start++; +/* Interrupted by SIGINT (Ctrl+C) ? */ +bool interrupted = false; +bool in_cleanup = false; +bool in_password = false; - /* - * parse value - */ - if ((end = get_next_token(start, value, buffer)) == NULL) - return false; +/* Connection routines */ +static void init_cancel_handler(void); +static void on_before_exec(PGconn *conn, PGcancel *thread_cancel_conn); +static void on_after_exec(PGcancel *thread_cancel_conn); +static void on_interrupt(void); +static void on_cleanup(void); +static pqsigfunc oldhandler = NULL; - if ((start = skip_space(end, buffer)) == NULL) - return false; +static char ** pgut_pgfnames(const char *path, bool strict); +static void pgut_pgfnames_cleanup(char **filenames); - if (*start != '\0' && *start != '#') - { - elog(ERROR, "syntax error in \"%s\"", buffer); - return false; - } +void discard_response(PGconn *conn); - return true; +void +pgut_init(void) +{ + init_cancel_handler(); + atexit(on_cleanup); } /* @@ -1461,8 +190,10 @@ pgut_get_conninfo_string(PGconn *conn) (option->val != NULL && option->val[0] == '\0')) continue; - /* do not print password into the file */ - if (strcmp(option->keyword, "password") == 0) + /* do not print password, passfile and options into the file */ + if (strcmp(option->keyword, "password") == 0 || + strcmp(option->keyword, "passfile") == 0 || + strcmp(option->keyword, "options") == 0) continue; if (!firstkeyword) @@ -1480,15 +211,12 @@ pgut_get_conninfo_string(PGconn *conn) return connstr; } +/* TODO: it is better to use PQconnectdbParams like in psql + * It will allow to set application_name for pg_probackup + */ PGconn * -pgut_connect(const char *dbname) -{ - return pgut_connect_extended(host, port, dbname, username); -} - -PGconn * -pgut_connect_extended(const char *pghost, const char *pgport, - const char *dbname, const char *login) +pgut_connect(const char *host, const char *port, + const char *dbname, const char *username) { PGconn *conn; @@ -1499,21 +227,24 @@ pgut_connect_extended(const char *pghost, const char *pgport, elog(ERROR, "You cannot specify --password and --no-password options together"); if (!password && force_password) - prompt_for_password(login); + prompt_for_password(username); /* Start the connection. Loop until we have a password if requested by backend. */ for (;;) { - conn = PQsetdbLogin(pghost, pgport, NULL, NULL, - dbname, login, password); + conn = PQsetdbLogin(host, port, NULL, NULL, + dbname, username, password); if (PQstatus(conn) == CONNECTION_OK) + { + pgut_atexit_push(pgut_disconnect_callback, conn); return conn; + } if (conn && PQconnectionNeedsPassword(conn) && prompt_password) { PQfinish(conn); - prompt_for_password(login); + prompt_for_password(username); if (interrupted) elog(ERROR, "interrupted"); @@ -1532,14 +263,8 @@ pgut_connect_extended(const char *pghost, const char *pgport, } PGconn * -pgut_connect_replication(const char *dbname) -{ - return pgut_connect_replication_extended(host, port, dbname, username); -} - -PGconn * -pgut_connect_replication_extended(const char *pghost, const char *pgport, - const char *dbname, const char *pguser) +pgut_connect_replication(const char *host, const char *port, + const char *dbname, const char *username) { PGconn *tmpconn; int argcount = 7; /* dbname, replication, fallback_app_name, @@ -1555,7 +280,7 @@ pgut_connect_replication_extended(const char *pghost, const char *pgport, elog(ERROR, "You cannot specify --password and --no-password options together"); if (!password && force_password) - prompt_for_password(pguser); + prompt_for_password(username); i = 0; @@ -1573,22 +298,22 @@ pgut_connect_replication_extended(const char *pghost, const char *pgport, values[i] = PROGRAM_NAME; i++; - if (pghost) + if (host) { keywords[i] = "host"; - values[i] = pghost; + values[i] = host; i++; } - if (pguser) + if (username) { keywords[i] = "user"; - values[i] = pguser; + values[i] = username; i++; } - if (pgport) + if (port) { keywords[i] = "port"; - values[i] = pgport; + values[i] = port; i++; } @@ -1619,7 +344,7 @@ pgut_connect_replication_extended(const char *pghost, const char *pgport, if (tmpconn && PQconnectionNeedsPassword(tmpconn) && prompt_password) { PQfinish(tmpconn); - prompt_for_password(pguser); + prompt_for_password(username); keywords[i] = "password"; values[i] = password; continue; @@ -1640,39 +365,15 @@ pgut_disconnect(PGconn *conn) { if (conn) PQfinish(conn); -} - -/* set/get host and port for connecting standby server */ -const char * -pgut_get_host() -{ - return host; -} - -const char * -pgut_get_port() -{ - return port; -} - -void -pgut_set_host(const char *new_host) -{ - host = new_host; -} - -void -pgut_set_port(const char *new_port) -{ - port = new_port; + pgut_atexit_pop(pgut_disconnect_callback, conn); } PGresult * -pgut_execute_parallel(PGconn* conn, - PGcancel* thread_cancel_conn, const char *query, +pgut_execute_parallel(PGconn* conn, + PGcancel* thread_cancel_conn, const char *query, int nParams, const char **params, - bool text_result) + bool text_result, bool ok_error, bool async) { PGresult *res; @@ -1680,7 +381,8 @@ pgut_execute_parallel(PGconn* conn, elog(ERROR, "interrupted"); /* write query to elog if verbose */ - if (log_level_console <= VERBOSE || log_level_file <= VERBOSE) + if (logger_config.log_level_console <= VERBOSE || + logger_config.log_level_file <= VERBOSE) { int i; @@ -1699,15 +401,56 @@ pgut_execute_parallel(PGconn* conn, } //on_before_exec(conn, thread_cancel_conn); - if (nParams == 0) - res = PQexec(conn, query); + if (async) + { + /* clean any old data */ + discard_response(conn); + + if (nParams == 0) + PQsendQuery(conn, query); + else + PQsendQueryParams(conn, query, nParams, NULL, params, NULL, NULL, + /* + * Specify zero to obtain results in text format, + * or one to obtain results in binary format. + */ + (text_result) ? 0 : 1); + + /* wait for processing, TODO: timeout */ + for (;;) + { + if (interrupted) + { + pgut_cancel(conn); + pgut_disconnect(conn); + elog(ERROR, "interrupted"); + } + + if (!PQconsumeInput(conn)) + elog(ERROR, "query failed: %s query was: %s", + PQerrorMessage(conn), query); + + /* query is no done */ + if (!PQisBusy(conn)) + break; + + usleep(10000); + } + + res = PQgetResult(conn); + } else - res = PQexecParams(conn, query, nParams, NULL, params, NULL, NULL, - /* - * Specify zero to obtain results in text format, - * or one to obtain results in binary format. - */ - (text_result) ? 0 : 1); + { + if (nParams == 0) + res = PQexec(conn, query); + else + res = PQexecParams(conn, query, nParams, NULL, params, NULL, NULL, + /* + * Specify zero to obtain results in text format, + * or one to obtain results in binary format. + */ + (text_result) ? 0 : 1); + } //on_after_exec(thread_cancel_conn); switch (PQresultStatus(res)) @@ -1717,6 +460,9 @@ pgut_execute_parallel(PGconn* conn, case PGRES_COPY_IN: break; default: + if (ok_error && PQresultStatus(res) == PGRES_FATAL_ERROR) + break; + elog(ERROR, "query failed: %squery was: %s", PQerrorMessage(conn), query); break; @@ -1742,7 +488,8 @@ pgut_execute_extended(PGconn* conn, const char *query, int nParams, elog(ERROR, "interrupted"); /* write query to elog if verbose */ - if (log_level_console <= VERBOSE || log_level_file <= VERBOSE) + if (logger_config.log_level_console <= VERBOSE || + logger_config.log_level_file <= VERBOSE) { int i; @@ -1800,7 +547,8 @@ pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elog(ERROR, "interrupted"); /* write query to elog if verbose */ - if (log_level_console <= VERBOSE || log_level_file <= VERBOSE) + if (logger_config.log_level_console <= VERBOSE || + logger_config.log_level_file <= VERBOSE) { int i; @@ -1931,7 +679,7 @@ on_before_exec(PGconn *conn, PGcancel *thread_cancel_conn) //elog(WARNING, "Handle tread_cancel_conn. on_before_exec"); old = thread_cancel_conn; - /* be sure handle_sigint doesn't use pointer while freeing */ + /* be sure handle_interrupt doesn't use pointer while freeing */ thread_cancel_conn = NULL; if (old != NULL) @@ -1944,7 +692,7 @@ on_before_exec(PGconn *conn, PGcancel *thread_cancel_conn) /* Free the old one if we have one */ old = cancel_conn; - /* be sure handle_sigint doesn't use pointer while freeing */ + /* be sure handle_interrupt doesn't use pointer while freeing */ cancel_conn = NULL; if (old != NULL) @@ -1980,7 +728,7 @@ on_after_exec(PGcancel *thread_cancel_conn) //elog(WARNING, "Handle tread_cancel_conn. on_after_exec"); old = thread_cancel_conn; - /* be sure handle_sigint doesn't use pointer while freeing */ + /* be sure handle_interrupt doesn't use pointer while freeing */ thread_cancel_conn = NULL; if (old != NULL) @@ -1990,7 +738,7 @@ on_after_exec(PGcancel *thread_cancel_conn) { old = cancel_conn; - /* be sure handle_sigint doesn't use pointer while freeing */ + /* be sure handle_interrupt doesn't use pointer while freeing */ cancel_conn = NULL; if (old != NULL) @@ -2010,10 +758,13 @@ on_interrupt(void) int save_errno = errno; char errbuf[256]; - /* Set interruped flag */ + /* Set interrupted flag */ interrupted = true; - /* User promts password, call on_cleanup() byhand */ + /* + * User prompts password, call on_cleanup() byhand. Unless we do that we will + * get stuck forever until a user enters a password. + */ if (in_password) { on_cleanup(); @@ -2042,6 +793,14 @@ struct pgut_atexit_item static pgut_atexit_item *pgut_atexit_stack = NULL; +void +pgut_disconnect_callback(bool fatal, void *userdata) +{ + PGconn *conn = (PGconn *) userdata; + if (conn) + pgut_disconnect(conn); +} + void pgut_atexit_push(pgut_atexit_callback callback, void *userdata) { @@ -2080,9 +839,11 @@ static void call_atexit_callbacks(bool fatal) { pgut_atexit_item *item; - - for (item = pgut_atexit_stack; item; item = item->next) + pgut_atexit_item *next; + for (item = pgut_atexit_stack; item; item = next){ + next = item->next; item->callback(fatal, item->userdata); + } } static void @@ -2093,108 +854,6 @@ on_cleanup(void) call_atexit_callbacks(false); } -static void -exit_or_abort(int exitcode) -{ - if (in_cleanup) - { - /* oops, error in cleanup*/ - call_atexit_callbacks(true); - abort(); - } - else - { - /* normal exit */ - exit(exitcode); - } -} - -/* - * Returns the current user name. - */ -static const char * -get_username(void) -{ - const char *ret; - -#ifndef WIN32 - struct passwd *pw; - - pw = getpwuid(geteuid()); - ret = (pw ? pw->pw_name : NULL); -#else - static char username[128]; /* remains after function exit */ - DWORD len = sizeof(username) - 1; - - if (GetUserName(username, &len)) - ret = username; - else - { - _dosmaperr(GetLastError()); - ret = NULL; - } -#endif - - if (ret == NULL) - elog(ERROR, "%s: could not get current user name: %s", - PROGRAM_NAME, strerror(errno)); - return ret; -} - -int -appendStringInfoFile(StringInfo str, FILE *fp) -{ - AssertArg(str != NULL); - AssertArg(fp != NULL); - - for (;;) - { - int rc; - - if (str->maxlen - str->len < 2 && enlargeStringInfo(str, 1024) == 0) - return errno = ENOMEM; - - rc = fread(str->data + str->len, 1, str->maxlen - str->len - 1, fp); - if (rc == 0) - break; - else if (rc > 0) - { - str->len += rc; - str->data[str->len] = '\0'; - } - else if (ferror(fp) && errno != EINTR) - return errno; - } - return 0; -} - -int -appendStringInfoFd(StringInfo str, int fd) -{ - AssertArg(str != NULL); - AssertArg(fd != -1); - - for (;;) - { - int rc; - - if (str->maxlen - str->len < 2 && enlargeStringInfo(str, 1024) == 0) - return errno = ENOMEM; - - rc = read(fd, str->data + str->len, str->maxlen - str->len - 1); - if (rc == 0) - break; - else if (rc > 0) - { - str->len += rc; - str->data[str->len] = '\0'; - } - else if (errno != EINTR) - return errno; - } - return 0; -} - void * pgut_malloc(size_t size) { @@ -2231,42 +890,12 @@ pgut_strdup(const char *str) return ret; } -char * -strdup_with_len(const char *str, size_t len) -{ - char *r; - - if (str == NULL) - return NULL; - - r = pgut_malloc(len + 1); - memcpy(r, str, len); - r[len] = '\0'; - return r; -} - -/* strdup but trim whitespaces at head and tail */ -char * -strdup_trim(const char *str) -{ - size_t len; - - if (str == NULL) - return NULL; - - while (IsSpace(str[0])) { str++; } - len = strlen(str); - while (len > 0 && IsSpace(str[len - 1])) { len--; } - - return strdup_with_len(str, len); -} - FILE * pgut_fopen(const char *path, const char *mode, bool missing_ok) { FILE *fp; - if ((fp = fopen(path, mode)) == NULL) + if ((fp = fio_open_stream(path, FIO_BACKUP_HOST)) == NULL) { if (missing_ok && errno == ENOENT) return NULL; @@ -2315,15 +944,18 @@ wait_for_sockets(int nfds, fd_set *fds, struct timeval *timeout) #ifndef WIN32 static void -handle_sigint(SIGNAL_ARGS) +handle_interrupt(SIGNAL_ARGS) { on_interrupt(); } +/* Handle various inrerruptions in the same way */ static void init_cancel_handler(void) { - oldhandler = pqsignal(SIGINT, handle_sigint); + oldhandler = pqsignal(SIGINT, handle_interrupt); + pqsignal(SIGQUIT, handle_interrupt); + pqsignal(SIGTERM, handle_interrupt); } #else /* WIN32 */ @@ -2420,3 +1052,153 @@ select_win32(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, con } #endif /* WIN32 */ + +void +discard_response(PGconn *conn) +{ + PGresult *res; + + do + { + res = PQgetResult(conn); + if (res) + PQclear(res); + } while (res); +} + +/* + * pgfnames + * + * return a list of the names of objects in the argument directory. Caller + * must call pgfnames_cleanup later to free the memory allocated by this + * function. + */ +char ** +pgut_pgfnames(const char *path, bool strict) +{ + DIR *dir; + struct dirent *file; + char **filenames; + int numnames = 0; + int fnsize = 200; /* enough for many small dbs */ + + dir = opendir(path); + if (dir == NULL) + { + elog(strict ? ERROR : WARNING, "could not open directory \"%s\": %m", path); + return NULL; + } + + filenames = (char **) palloc(fnsize * sizeof(char *)); + + while (errno = 0, (file = readdir(dir)) != NULL) + { + if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0) + { + if (numnames + 1 >= fnsize) + { + fnsize *= 2; + filenames = (char **) repalloc(filenames, + fnsize * sizeof(char *)); + } + filenames[numnames++] = pstrdup(file->d_name); + } + } + + if (errno) + { + elog(strict ? ERROR : WARNING, "could not read directory \"%s\": %m", path); + return NULL; + } + + filenames[numnames] = NULL; + + if (closedir(dir)) + { + elog(strict ? ERROR : WARNING, "could not close directory \"%s\": %m", path); + return NULL; + } + + return filenames; +} + +/* + * pgfnames_cleanup + * + * deallocate memory used for filenames + */ +void +pgut_pgfnames_cleanup(char **filenames) +{ + char **fn; + + for (fn = filenames; *fn; fn++) + pfree(*fn); + + pfree(filenames); +} + +/* Shamelessly stolen from commom/rmtree.c */ +bool +pgut_rmtree(const char *path, bool rmtopdir, bool strict) +{ + bool result = true; + char pathbuf[MAXPGPATH]; + char **filenames; + char **filename; + struct stat statbuf; + + /* + * we copy all the names out of the directory before we start modifying + * it. + */ + filenames = pgut_pgfnames(path, strict); + + if (filenames == NULL) + return false; + + /* now we have the names we can start removing things */ + for (filename = filenames; *filename; filename++) + { + snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename); + + if (lstat(pathbuf, &statbuf) != 0) + { + elog(strict ? ERROR : WARNING, "could not stat file or directory \"%s\": %m", pathbuf); + result = false; + break; + } + + if (S_ISDIR(statbuf.st_mode)) + { + /* call ourselves recursively for a directory */ + if (!pgut_rmtree(pathbuf, true, strict)) + { + result = false; + break; + } + } + else + { + if (unlink(pathbuf) != 0) + { + elog(strict ? ERROR : WARNING, "could not remove file or directory \"%s\": %m", pathbuf); + result = false; + break; + } + } + } + + if (rmtopdir) + { + if (rmdir(path) != 0) + { + elog(strict ? ERROR : WARNING, "could not remove file or directory \"%s\": %m", path); + result = false; + } + } + + pgut_pgfnames_cleanup(filenames); + + return result; +} diff --git a/src/utils/pgut.h b/src/utils/pgut.h index 0947fb7f2..d196aad3d 100644 --- a/src/utils/pgut.h +++ b/src/utils/pgut.h @@ -3,7 +3,7 @@ * pgut.h * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2017-2017, Postgres Professional + * Portions Copyright (c) 2017-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -11,98 +11,16 @@ #ifndef PGUT_H #define PGUT_H +#include "postgres_fe.h" #include "libpq-fe.h" -#include "pqexpbuffer.h" -#include -#include - -#include "access/xlogdefs.h" -#include "logger.h" - -#if !defined(C_H) && !defined(__cplusplus) -#ifndef bool -typedef char bool; -#endif -#ifndef true -#define true ((bool) 1) -#endif -#ifndef false -#define false ((bool) 0) -#endif -#endif - -#define INFINITE_STR "INFINITE" - -typedef enum pgut_optsrc -{ - SOURCE_DEFAULT, - SOURCE_FILE_STRICT, - SOURCE_ENV, - SOURCE_FILE, - SOURCE_CMDLINE, - SOURCE_CONST -} pgut_optsrc; - -/* - * type: - * b: bool (true) - * B: bool (false) - * f: pgut_optfn - * i: 32bit signed integer - * u: 32bit unsigned integer - * I: 64bit signed integer - * U: 64bit unsigned integer - * s: string - * t: time_t - */ -typedef struct pgut_option -{ - char type; - uint8 sname; /* short name */ - const char *lname; /* long name */ - void *var; /* pointer to variable */ - pgut_optsrc allowed; /* allowed source */ - pgut_optsrc source; /* actual source */ - int flags; /* option unit */ -} pgut_option; - -typedef void (*pgut_optfn) (pgut_option *opt, const char *arg); typedef void (*pgut_atexit_callback)(bool fatal, void *userdata); -/* - * bit values in "flags" of an option - */ -#define OPTION_UNIT_KB 0x1000 /* value is in kilobytes */ -#define OPTION_UNIT_BLOCKS 0x2000 /* value is in blocks */ -#define OPTION_UNIT_XBLOCKS 0x3000 /* value is in xlog blocks */ -#define OPTION_UNIT_XSEGS 0x4000 /* value is in xlog segments */ -#define OPTION_UNIT_MEMORY 0xF000 /* mask for size-related units */ - -#define OPTION_UNIT_MS 0x10000 /* value is in milliseconds */ -#define OPTION_UNIT_S 0x20000 /* value is in seconds */ -#define OPTION_UNIT_MIN 0x30000 /* value is in minutes */ -#define OPTION_UNIT_TIME 0xF0000 /* mask for time-related units */ - -#define OPTION_UNIT (OPTION_UNIT_MEMORY | OPTION_UNIT_TIME) - -/* - * pgut client variables and functions - */ -extern const char *PROGRAM_NAME; -extern const char *PROGRAM_VERSION; -extern const char *PROGRAM_URL; -extern const char *PROGRAM_EMAIL; - extern void pgut_help(bool details); /* * pgut framework variables and functions */ -extern const char *pgut_dbname; -extern const char *host; -extern const char *port; -extern const char *username; extern bool prompt_password; extern bool force_password; @@ -110,47 +28,39 @@ extern bool interrupted; extern bool in_cleanup; extern bool in_password; /* User prompts password */ -extern int pgut_getopt(int argc, char **argv, pgut_option options[]); -extern int pgut_readopt(const char *path, pgut_option options[], int elevel); -extern void pgut_getopt_env(pgut_option options[]); extern void pgut_atexit_push(pgut_atexit_callback callback, void *userdata); extern void pgut_atexit_pop(pgut_atexit_callback callback, void *userdata); +extern void pgut_init(void); + /* * Database connections */ extern char *pgut_get_conninfo_string(PGconn *conn); -extern PGconn *pgut_connect(const char *dbname); -extern PGconn *pgut_connect_extended(const char *pghost, const char *pgport, - const char *dbname, const char *login); -extern PGconn *pgut_connect_replication(const char *dbname); -extern PGconn *pgut_connect_replication_extended(const char *pghost, const char *pgport, - const char *dbname, const char *pguser); +extern PGconn *pgut_connect(const char *host, const char *port, + const char *dbname, const char *username); +extern PGconn *pgut_connect_replication(const char *host, const char *port, + const char *dbname, + const char *username); extern void pgut_disconnect(PGconn *conn); +extern void pgut_disconnect_callback(bool fatal, void *userdata); extern PGresult *pgut_execute(PGconn* conn, const char *query, int nParams, const char **params); extern PGresult *pgut_execute_extended(PGconn* conn, const char *query, int nParams, const char **params, bool text_result, bool ok_error); -extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, +extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, const char *query, int nParams, - const char **params, bool text_result); + const char **params, bool text_result, bool ok_error, bool async); extern bool pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elevel); extern void pgut_cancel(PGconn* conn); extern int pgut_wait(int num, PGconn *connections[], struct timeval *timeout); -extern const char *pgut_get_host(void); -extern const char *pgut_get_port(void); -extern void pgut_set_host(const char *new_host); -extern void pgut_set_port(const char *new_port); - /* * memory allocators */ extern void *pgut_malloc(size_t size); extern void *pgut_realloc(void *p, size_t size); extern char *pgut_strdup(const char *str); -extern char *strdup_with_len(const char *str, size_t len); -extern char *strdup_trim(const char *str); #define pgut_new(type) ((type *) pgut_malloc(sizeof(type))) #define pgut_newarray(type, n) ((type *) pgut_malloc(sizeof(type) * (n))) @@ -177,49 +87,9 @@ extern FILE *pgut_fopen(const char *path, const char *mode, bool missing_ok); #define AssertMacro(x) ((void) 0) #endif -/* - * StringInfo and string operations - */ -#define STRINGINFO_H - -#define StringInfoData PQExpBufferData -#define StringInfo PQExpBuffer -#define makeStringInfo createPQExpBuffer -#define initStringInfo initPQExpBuffer -#define freeStringInfo destroyPQExpBuffer -#define termStringInfo termPQExpBuffer -#define resetStringInfo resetPQExpBuffer -#define enlargeStringInfo enlargePQExpBuffer -#define printfStringInfo printfPQExpBuffer /* reset + append */ -#define appendStringInfo appendPQExpBuffer -#define appendStringInfoString appendPQExpBufferStr -#define appendStringInfoChar appendPQExpBufferChar -#define appendBinaryStringInfo appendBinaryPQExpBuffer - -extern int appendStringInfoFile(StringInfo str, FILE *fp); -extern int appendStringInfoFd(StringInfo str, int fd); - -extern bool parse_bool(const char *value, bool *result); -extern bool parse_bool_with_len(const char *value, size_t len, bool *result); -extern bool parse_int32(const char *value, int32 *result, int flags); -extern bool parse_uint32(const char *value, uint32 *result, int flags); -extern bool parse_int64(const char *value, int64 *result, int flags); -extern bool parse_uint64(const char *value, uint64 *result, int flags); -extern bool parse_time(const char *value, time_t *result, bool utc_default); -extern bool parse_int(const char *value, int *result, int flags, - const char **hintmsg); -extern bool parse_lsn(const char *value, XLogRecPtr *result); - -extern void convert_from_base_unit(int64 base_value, int base_unit, - int64 *value, const char **unit); -extern void convert_from_base_unit_u(uint64 base_value, int base_unit, - uint64 *value, const char **unit); - #define IsSpace(c) (isspace((unsigned char)(c))) #define IsAlpha(c) (isalpha((unsigned char)(c))) #define IsAlnum(c) (isalnum((unsigned char)(c))) -#define IsIdentHead(c) (IsAlpha(c) || (c) == '_') -#define IsIdentBody(c) (IsAlnum(c) || (c) == '_') #define ToLower(c) (tolower((unsigned char)(c))) #define ToUpper(c) (toupper((unsigned char)(c))) diff --git a/src/utils/remote.c b/src/utils/remote.c new file mode 100644 index 000000000..f590a82b4 --- /dev/null +++ b/src/utils/remote.c @@ -0,0 +1,250 @@ +#include +#include +#include +#include +#include +#include + +#ifdef WIN32 +#define __thread __declspec(thread) +#else +#include +#endif + +#include "pg_probackup.h" +#include "file.h" + +#define MAX_CMDLINE_LENGTH 4096 +#define MAX_CMDLINE_OPTIONS 256 +#define ERR_BUF_SIZE 4096 +#define PIPE_SIZE (64*1024) + +static int split_options(int argc, char* argv[], int max_options, char* options) +{ + char* opt = options; + char in_quote = '\0'; + while (true) { + switch (*opt) { + case '\'': + case '\"': + if (!in_quote) { + in_quote = *opt++; + continue; + } + if (*opt == in_quote && *++opt != in_quote) { + in_quote = '\0'; + continue; + } + break; + case '\0': + if (opt != options) { + argv[argc++] = options; + if (argc >= max_options) + elog(ERROR, "Too much options"); + } + return argc; + case ' ': + argv[argc++] = options; + if (argc >= max_options) + elog(ERROR, "Too much options"); + *opt++ = '\0'; + options = opt; + continue; + default: + break; + } + opt += 1; + } + return argc; +} + +static __thread int child_pid; + +#if 0 +static void kill_child(void) +{ + kill(child_pid, SIGTERM); +} +#endif + + +void wait_ssh(void) +{ +/* + * We need to wait termination of SSH process to eliminate zombies. + * There is no waitpid() function at Windows but there are no zombie processes caused by lack of wait/waitpid. + * So just disable waitpid for Windows. + */ +#ifndef WIN32 + int status; + waitpid(child_pid, &status, 0); + elog(LOG, "SSH process %d is terminated with status %d", child_pid, status); +#endif +} + +#ifdef WIN32 +void launch_ssh(char* argv[]) +{ + int infd = atoi(argv[2]); + int outfd = atoi(argv[3]); + + SYS_CHECK(close(STDIN_FILENO)); + SYS_CHECK(close(STDOUT_FILENO)); + + SYS_CHECK(dup2(infd, STDIN_FILENO)); + SYS_CHECK(dup2(outfd, STDOUT_FILENO)); + + SYS_CHECK(execvp(argv[4], argv+4)); +} +#endif + +static bool needs_quotes(char const* path) +{ + return strchr(path, ' ') != NULL; +} + +bool launch_agent(void) +{ + char cmd[MAX_CMDLINE_LENGTH]; + char* ssh_argv[MAX_CMDLINE_OPTIONS]; + int ssh_argc; + int outfd[2]; + int infd[2]; + int errfd[2]; + int agent_version; + + ssh_argc = 0; +#ifdef WIN32 + ssh_argv[ssh_argc++] = PROGRAM_NAME_FULL; + ssh_argv[ssh_argc++] = "ssh"; + ssh_argc += 2; /* reserve space for pipe descriptors */ +#endif + ssh_argv[ssh_argc++] = instance_config.remote.proto; + if (instance_config.remote.port != NULL) { + ssh_argv[ssh_argc++] = "-p"; + ssh_argv[ssh_argc++] = instance_config.remote.port; + } + if (instance_config.remote.user != NULL) { + ssh_argv[ssh_argc++] = "-l"; + ssh_argv[ssh_argc++] = instance_config.remote.user; + } + if (instance_config.remote.ssh_config != NULL) { + ssh_argv[ssh_argc++] = "-F"; + ssh_argv[ssh_argc++] = instance_config.remote.ssh_config; + } + if (instance_config.remote.ssh_options != NULL) { + ssh_argc = split_options(ssh_argc, ssh_argv, MAX_CMDLINE_OPTIONS, pg_strdup(instance_config.remote.ssh_options)); + } + + ssh_argv[ssh_argc++] = "-o"; + ssh_argv[ssh_argc++] = "PasswordAuthentication=no"; + + ssh_argv[ssh_argc++] = "-o"; + ssh_argv[ssh_argc++] = "Compression=no"; + + ssh_argv[ssh_argc++] = "-o"; + ssh_argv[ssh_argc++] = "LogLevel=error"; + + ssh_argv[ssh_argc++] = instance_config.remote.host; + ssh_argv[ssh_argc++] = cmd; + ssh_argv[ssh_argc] = NULL; + + if (instance_config.remote.path) + { + char const* probackup = PROGRAM_NAME_FULL; + char* sep = strrchr(probackup, '/'); + if (sep != NULL) { + probackup = sep + 1; + } +#ifdef WIN32 + else { + sep = strrchr(probackup, '\\'); + if (sep != NULL) { + probackup = sep + 1; + } + } + if (needs_quotes(instance_config.remote.path) || needs_quotes(PROGRAM_NAME_FULL)) + snprintf(cmd, sizeof(cmd), "\"%s\\%s\" agent", + instance_config.remote.path, probackup); + else + snprintf(cmd, sizeof(cmd), "%s\\%s agent", + instance_config.remote.path, probackup); +#else + if (needs_quotes(instance_config.remote.path) || needs_quotes(PROGRAM_NAME_FULL)) + snprintf(cmd, sizeof(cmd), "\"%s/%s\" agent", + instance_config.remote.path, probackup); + else + snprintf(cmd, sizeof(cmd), "%s/%s agent", + instance_config.remote.path, probackup); +#endif + } else { + if (needs_quotes(PROGRAM_NAME_FULL)) + snprintf(cmd, sizeof(cmd), "\"%s\" agent", PROGRAM_NAME_FULL); + else + snprintf(cmd, sizeof(cmd), "%s agent", PROGRAM_NAME_FULL); + } + +#ifdef WIN32 + SYS_CHECK(_pipe(infd, PIPE_SIZE, _O_BINARY)) ; + SYS_CHECK(_pipe(outfd, PIPE_SIZE, _O_BINARY)); + ssh_argv[2] = psprintf("%d", outfd[0]); + ssh_argv[3] = psprintf("%d", infd[1]); + { + intptr_t pid = _spawnvp(_P_NOWAIT, ssh_argv[0], ssh_argv); + if (pid < 0) + return false; + child_pid = GetProcessId((HANDLE)pid); +#else + SYS_CHECK(pipe(infd)); + SYS_CHECK(pipe(outfd)); + SYS_CHECK(pipe(errfd)); + + SYS_CHECK(child_pid = fork()); + + if (child_pid == 0) { /* child */ + SYS_CHECK(close(STDIN_FILENO)); + SYS_CHECK(close(STDOUT_FILENO)); + SYS_CHECK(close(STDERR_FILENO)); + + SYS_CHECK(dup2(outfd[0], STDIN_FILENO)); + SYS_CHECK(dup2(infd[1], STDOUT_FILENO)); + SYS_CHECK(dup2(errfd[1], STDERR_FILENO)); + + SYS_CHECK(close(infd[0])); + SYS_CHECK(close(infd[1])); + SYS_CHECK(close(outfd[0])); + SYS_CHECK(close(outfd[1])); + SYS_CHECK(close(errfd[0])); + SYS_CHECK(close(errfd[1])); + + if (execvp(ssh_argv[0], ssh_argv) < 0) + return false; + } else { +#endif + elog(LOG, "Start SSH client process, pid %d", child_pid); + SYS_CHECK(close(infd[1])); /* These are being used by the child */ + SYS_CHECK(close(outfd[0])); + SYS_CHECK(close(errfd[1])); + /*atexit(kill_child);*/ + + fio_redirect(infd[0], outfd[1], errfd[0]); /* write to stdout */ + } + + /* Make sure that remote agent has the same version + * TODO: we must also check PG version and fork edition + */ + agent_version = fio_get_agent_version(); + if (agent_version != AGENT_PROTOCOL_VERSION) + { + char agent_version_str[1024]; + sprintf(agent_version_str, "%d.%d.%d", + agent_version / 10000, + (agent_version / 100) % 100, + agent_version % 100); + + elog(ERROR, "Remote agent version %s does not match local program version %s", + agent_version_str, PROGRAM_VERSION); + } + + return true; +} diff --git a/src/utils/remote.h b/src/utils/remote.h new file mode 100644 index 000000000..dc98644ab --- /dev/null +++ b/src/utils/remote.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * remote.h: - prototypes of remote functions. + * + * Copyright (c) 2017-2019, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef REMOTE_H +#define REMOTE_H + +typedef struct RemoteConfig +{ + char* proto; + char* host; + char* port; + char* path; + char* user; + char *ssh_config; + char *ssh_options; +} RemoteConfig; + +#endif diff --git a/src/utils/thread.c b/src/utils/thread.c index 82c237641..5ceee068d 100644 --- a/src/utils/thread.c +++ b/src/utils/thread.c @@ -2,15 +2,22 @@ * * thread.c: - multi-platform pthread implementations. * - * Copyright (c) 2018, Postgres Professional + * Copyright (c) 2018-2019, Postgres Professional * *------------------------------------------------------------------------- */ +#include "postgres_fe.h" + #include "thread.h" -pthread_t main_tid = 0; +bool thread_interrupted = false; +#ifdef WIN32 +DWORD main_tid = 0; +#else +pthread_t main_tid = 0; +#endif #ifdef WIN32 #include diff --git a/src/utils/thread.h b/src/utils/thread.h index 064605331..2eaa5fb45 100644 --- a/src/utils/thread.h +++ b/src/utils/thread.h @@ -2,7 +2,7 @@ * * thread.h: - multi-platform pthread implementations. * - * Copyright (c) 2018, Postgres Professional + * Copyright (c) 2018-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -28,7 +28,13 @@ extern int pthread_join(pthread_t th, void **thread_return); #include #endif +#ifdef WIN32 +extern DWORD main_tid; +#else extern pthread_t main_tid; +#endif + +extern bool thread_interrupted; extern int pthread_lock(pthread_mutex_t *mp); diff --git a/src/validate.c b/src/validate.c index 404965c8c..d16d27677 100644 --- a/src/validate.c +++ b/src/validate.c @@ -3,7 +3,7 @@ * validate.c: validate backup files. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * *------------------------------------------------------------------------- */ @@ -19,11 +19,20 @@ static void *pgBackupValidateFiles(void *arg); static void do_validate_instance(void); static bool corrupted_backup_found = false; +static bool skipped_due_to_lock = false; typedef struct { - parray *files; + const char *base_path; + parray *files; bool corrupted; + XLogRecPtr stop_lsn; + uint32 checksum_version; + uint32 backup_version; + BackupMode backup_mode; + parray *dbOid_exclude_list; + const char *external_prefix; + HeaderMap *hdr_map; /* * Return value from the thread. @@ -34,24 +43,49 @@ typedef struct /* * Validate backup files. + * TODO: partial validation. */ void -pgBackupValidate(pgBackup *backup) +pgBackupValidate(pgBackup *backup, pgRestoreParams *params) { char base_path[MAXPGPATH]; - char path[MAXPGPATH]; - parray *files; + char external_prefix[MAXPGPATH]; + parray *files = NULL; bool corrupted = false; bool validation_isok = true; /* arrays with meta info for multi threaded validate */ pthread_t *threads; validate_files_arg *threads_args; int i; +// parray *dbOid_exclude_list = NULL; + + /* Check backup program version */ + if (parse_program_version(backup->program_version) > parse_program_version(PROGRAM_VERSION)) + elog(ERROR, "pg_probackup binary version is %s, but backup %s version is %s. " + "pg_probackup do not guarantee to be forward compatible. " + "Please upgrade pg_probackup binary.", + PROGRAM_VERSION, base36enc(backup->start_time), backup->program_version); + + /* Check backup server version */ + if (strcmp(backup->server_version, PG_MAJORVERSION) != 0) + elog(ERROR, "Backup %s has server version %s, but current pg_probackup binary " + "compiled with server version %s", + base36enc(backup->start_time), backup->server_version, PG_MAJORVERSION); + + if (backup->status == BACKUP_STATUS_RUNNING) + { + elog(WARNING, "Backup %s has status %s, change it to ERROR and skip validation", + base36enc(backup->start_time), status2str(backup->status)); + write_backup_status(backup, BACKUP_STATUS_ERROR, instance_name, true); + corrupted_backup_found = true; + return; + } /* Revalidation is attempted for DONE, ORPHAN and CORRUPT backups */ if (backup->status != BACKUP_STATUS_OK && backup->status != BACKUP_STATUS_DONE && backup->status != BACKUP_STATUS_ORPHAN && + backup->status != BACKUP_STATUS_MERGING && backup->status != BACKUP_STATUS_CORRUPT) { elog(WARNING, "Backup %s has status %s. Skip validation.", @@ -60,7 +94,17 @@ pgBackupValidate(pgBackup *backup) return; } - if (backup->status == BACKUP_STATUS_OK || backup->status == BACKUP_STATUS_DONE) + /* additional sanity */ + if (backup->backup_mode == BACKUP_MODE_FULL && + backup->status == BACKUP_STATUS_MERGING) + { + elog(WARNING, "Full backup %s has status %s, skip validation", + base36enc(backup->start_time), status2str(backup->status)); + return; + } + + if (backup->status == BACKUP_STATUS_OK || backup->status == BACKUP_STATUS_DONE || + backup->status == BACKUP_STATUS_MERGING) elog(INFO, "Validating backup %s", base36enc(backup->start_time)); else elog(INFO, "Revalidating backup %s", base36enc(backup->start_time)); @@ -71,9 +115,21 @@ pgBackupValidate(pgBackup *backup) backup->backup_mode != BACKUP_MODE_DIFF_DELTA) elog(WARNING, "Invalid backup_mode of backup %s", base36enc(backup->start_time)); - pgBackupGetPath(backup, base_path, lengthof(base_path), DATABASE_DIR); - pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); - files = dir_read_file_list(base_path, path); + join_path_components(base_path, backup->root_dir, DATABASE_DIR); + join_path_components(external_prefix, backup->root_dir, EXTERNAL_DIR); + files = get_backup_filelist(backup, false); + + if (!files) + { + elog(WARNING, "Backup %s file list is corrupted", base36enc(backup->start_time)); + backup->status = BACKUP_STATUS_CORRUPT; + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); + return; + } + +// if (params && params->partial_db_list) +// dbOid_exclude_list = get_dbOid_exclude_list(backup, files, params->partial_db_list, +// params->partial_restore_type); /* setup threads */ for (i = 0; i < parray_num(files); i++) @@ -88,12 +144,21 @@ pgBackupValidate(pgBackup *backup) palloc(sizeof(validate_files_arg) * num_threads); /* Validate files */ + thread_interrupted = false; for (i = 0; i < num_threads; i++) { validate_files_arg *arg = &(threads_args[i]); + arg->base_path = base_path; arg->files = files; arg->corrupted = false; + arg->backup_mode = backup->backup_mode; + arg->stop_lsn = backup->stop_lsn; + arg->checksum_version = backup->checksum_version; + arg->backup_version = parse_program_version(backup->program_version); + arg->external_prefix = external_prefix; + arg->hdr_map = &(backup->hdr_map); +// arg->dbOid_exclude_list = dbOid_exclude_list; /* By default there are some error */ threads_args[i].ret = 1; @@ -120,15 +185,41 @@ pgBackupValidate(pgBackup *backup) /* cleanup */ parray_walk(files, pgFileFree); parray_free(files); + cleanup_header_map(&(backup->hdr_map)); /* Update backup status */ - backup->status = corrupted ? BACKUP_STATUS_CORRUPT : BACKUP_STATUS_OK; - pgBackupWriteBackupControlFile(backup); + if (corrupted) + backup->status = BACKUP_STATUS_CORRUPT; + write_backup_status(backup, corrupted ? BACKUP_STATUS_CORRUPT : + BACKUP_STATUS_OK, instance_name, true); if (corrupted) elog(WARNING, "Backup %s data files are corrupted", base36enc(backup->start_time)); else elog(INFO, "Backup %s data files are valid", base36enc(backup->start_time)); + + /* Issue #132 kludge */ + if (!corrupted && + ((parse_program_version(backup->program_version) == 20104)|| + (parse_program_version(backup->program_version) == 20105)|| + (parse_program_version(backup->program_version) == 20201))) + { + char path[MAXPGPATH]; + + //pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); + join_path_components(path, backup->root_dir, DATABASE_FILE_LIST); + + if (pgFileSize(path) >= (BLCKSZ*500)) + { + elog(WARNING, "Backup %s is a victim of metadata corruption. " + "Additional information can be found here: " + "https://github.com/postgrespro/pg_probackup/issues/132", + base36enc(backup->start_time)); + backup->status = BACKUP_STATUS_CORRUPT; + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); + } + + } } /* @@ -142,47 +233,83 @@ pgBackupValidateFiles(void *arg) { int i; validate_files_arg *arguments = (validate_files_arg *)arg; + int num_files = parray_num(arguments->files); pg_crc32 crc; - for (i = 0; i < parray_num(arguments->files); i++) + for (i = 0; i < num_files; i++) { struct stat st; pgFile *file = (pgFile *) parray_get(arguments->files, i); + char file_fullpath[MAXPGPATH]; - if (!pg_atomic_test_set_flag(&file->lock)) - continue; - - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "Interrupted during validate"); /* Validate only regular files */ if (!S_ISREG(file->mode)) continue; + /* - * Skip files which has no data, because they - * haven't changed between backups. + * If in partial validate, check if the file belongs to the database + * we exclude. Only files from pgdata can be skipped. */ - if (file->write_size == BYTES_INVALID) + //if (arguments->dbOid_exclude_list && file->external_dir_num == 0 + // && parray_bsearch(arguments->dbOid_exclude_list, + // &file->dbOid, pgCompareOid)) + //{ + // elog(VERBOSE, "Skip file validation due to partial restore: \"%s\"", + // file->rel_path); + // continue; + //} + + if (!pg_atomic_test_set_flag(&file->lock)) continue; + if (progress) + elog(INFO, "Progress: (%d/%d). Validate file \"%s\"", + i + 1, num_files, file->rel_path); + /* - * Currently we don't compute checksums for - * cfs_compressed data files, so skip them. + * Skip files which has no data, because they + * haven't changed between backups. */ - if (file->is_cfs) + if (file->write_size == BYTES_INVALID) + { + /* TODO: lookup corresponding merge bug */ + if (arguments->backup_mode == BACKUP_MODE_FULL) + { + /* It is illegal for file in FULL backup to have BYTES_INVALID */ + elog(WARNING, "Backup file \"%s\" has invalid size. Possible metadata corruption.", + file->rel_path); + arguments->corrupted = true; + break; + } + else + continue; + } + + /* no point in trying to open empty file */ + if (file->write_size == 0) continue; - /* print progress */ - elog(VERBOSE, "Validate files: (%d/%lu) %s", - i + 1, (unsigned long) parray_num(arguments->files), file->path); + if (file->external_dir_num) + { + char temp[MAXPGPATH]; - if (stat(file->path, &st) == -1) + makeExternalDirPathByNum(temp, arguments->external_prefix, file->external_dir_num); + join_path_components(file_fullpath, temp, file->rel_path); + } + else + join_path_components(file_fullpath, arguments->base_path, file->rel_path); + + /* TODO: it is redundant to check file existence using stat */ + if (stat(file_fullpath, &st) == -1) { if (errno == ENOENT) - elog(WARNING, "Backup file \"%s\" is not found", file->path); + elog(WARNING, "Backup file \"%s\" is not found", file_fullpath); else elog(WARNING, "Cannot stat backup file \"%s\": %s", - file->path, strerror(errno)); + file_fullpath, strerror(errno)); arguments->corrupted = true; break; } @@ -190,18 +317,60 @@ pgBackupValidateFiles(void *arg) if (file->write_size != st.st_size) { elog(WARNING, "Invalid size of backup file \"%s\" : " INT64_FORMAT ". Expected %lu", - file->path, file->write_size, (unsigned long) st.st_size); + file_fullpath, (unsigned long) st.st_size, file->write_size); arguments->corrupted = true; break; } - crc = pgFileGetCRC(file->path); - if (crc != file->crc) + /* + * If option skip-block-validation is set, compute only file-level CRC for + * datafiles, otherwise check them block by block. + * Currently we don't compute checksums for + * cfs_compressed data files, so skip block validation for them. + */ + if (!file->is_datafile || skip_block_validation || file->is_cfs) { - elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", - file->path, file->crc, crc); - arguments->corrupted = true; - break; + /* + * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we + * use CRC-32. + * + * pg_control stores its content and checksum of the content, calculated + * using CRC-32C. If we calculate checksum of the whole pg_control using + * CRC-32C we get same checksum constantly. It might be because of the + * CRC-32C algorithm. + * To avoid this problem we need to use different algorithm, CRC-32 in + * this case. + * + * Starting from 2.0.25 we calculate crc of pg_control differently. + */ + if (arguments->backup_version >= 20025 && + strcmp(file->name, "pg_control") == 0 && + !file->external_dir_num) + crc = get_pgcontrol_checksum(arguments->base_path); + else + crc = pgFileGetCRC(file_fullpath, + arguments->backup_version <= 20021 || + arguments->backup_version >= 20025, + false); + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file_fullpath, crc, file->crc); + arguments->corrupted = true; + } + } + else + { + /* + * validate relation block by block + * check page headers, checksums (if enabled) + * and compute checksum of the file + */ + if (!validate_file_pages(file, file_fullpath, arguments->stop_lsn, + arguments->checksum_version, + arguments->backup_version, + arguments->hdr_map)) + arguments->corrupted = true; } } @@ -218,6 +387,9 @@ pgBackupValidateFiles(void *arg) int do_validate_all(void) { + corrupted_backup_found = false; + skipped_due_to_lock = false; + if (instance_name == NULL) { /* Show list of instances */ @@ -234,6 +406,7 @@ do_validate_all(void) errno = 0; while ((dent = readdir(dir))) { + char conf_path[MAXPGPATH]; char child[MAXPGPATH]; struct stat st; @@ -250,9 +423,23 @@ do_validate_all(void) if (!S_ISDIR(st.st_mode)) continue; + /* + * Initialize instance configuration. + */ instance_name = dent->d_name; - sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance_name); + sprintf(backup_instance_path, "%s/%s/%s", + backup_path, BACKUPS_DIR, instance_name); sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); + join_path_components(conf_path, backup_instance_path, + BACKUP_CATALOG_CONF_FILE); + if (config_read_opt(conf_path, instance_options, ERROR, false, + true) == 0) + { + elog(WARNING, "Configuration file \"%s\" is empty", conf_path); + corrupted_backup_found = true; + continue; + } + do_validate_instance(); } } @@ -261,12 +448,24 @@ do_validate_all(void) do_validate_instance(); } + /* TODO: Probably we should have different exit code for every condition + * and they combination: + * 0 - all backups are valid + * 1 - some backups are corrupt + * 2 - some backups where skipped due to concurrent locks + * 3 - some backups are corrupt and some are skipped due to concurrent locks + */ + + if (skipped_due_to_lock) + elog(WARNING, "Some backups weren't locked and they were skipped"); + if (corrupted_backup_found) { elog(WARNING, "Some backups are not valid"); return 1; } - else + + if (!skipped_due_to_lock && !corrupted_backup_found) elog(INFO, "All backups are valid"); return 0; @@ -278,72 +477,223 @@ do_validate_all(void) static void do_validate_instance(void) { - char *current_backup_id; int i; + int j; parray *backups; pgBackup *current_backup = NULL; elog(INFO, "Validate backups of the instance '%s'", instance_name); - /* Get exclusive lock of backup catalog */ - catalog_lock(); - /* Get list of all backups sorted in order of descending start time */ - backups = catalog_get_backup_list(INVALID_BACKUP_ID); + backups = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID); /* Examine backups one by one and validate them */ for (i = 0; i < parray_num(backups); i++) { - current_backup = (pgBackup *) parray_get(backups, i); + pgBackup *base_full_backup; - /* Valiate each backup along with its xlog files. */ - pgBackupValidate(current_backup); + current_backup = (pgBackup *) parray_get(backups, i); - /* Ensure that the backup has valid list of parent backups */ - if (current_backup->status == BACKUP_STATUS_OK) + /* Find ancestor for incremental backup */ + if (current_backup->backup_mode != BACKUP_MODE_FULL) { - pgBackup *base_full_backup = current_backup; + pgBackup *tmp_backup = NULL; + int result; - if (current_backup->backup_mode != BACKUP_MODE_FULL) + result = scan_parent_chain(current_backup, &tmp_backup); + + /* chain is broken */ + if (result == ChainIsBroken) + { + char *parent_backup_id; + /* determine missing backup ID */ + + parent_backup_id = base36enc_dup(tmp_backup->parent_backup); + corrupted_backup_found = true; + + /* orphanize current_backup */ + if (current_backup->status == BACKUP_STATUS_OK || + current_backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(current_backup, BACKUP_STATUS_ORPHAN, instance_name, true); + elog(WARNING, "Backup %s is orphaned because his parent %s is missing", + base36enc(current_backup->start_time), + parent_backup_id); + } + else + { + elog(WARNING, "Backup %s has missing parent %s", + base36enc(current_backup->start_time), parent_backup_id); + } + pg_free(parent_backup_id); + continue; + } + /* chain is whole, but at least one parent is invalid */ + else if (result == ChainIsInvalid) { - base_full_backup = find_parent_backup(current_backup); + /* Oldest corrupt backup has a chance for revalidation */ + if (current_backup->start_time != tmp_backup->start_time) + { + char *backup_id = base36enc_dup(tmp_backup->start_time); + /* orphanize current_backup */ + if (current_backup->status == BACKUP_STATUS_OK || + current_backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(current_backup, BACKUP_STATUS_ORPHAN, instance_name, true); + elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", + base36enc(current_backup->start_time), backup_id, + status2str(tmp_backup->status)); + } + else + { + elog(WARNING, "Backup %s has parent %s with status: %s", + base36enc(current_backup->start_time), backup_id, + status2str(tmp_backup->status)); + } + pg_free(backup_id); + continue; + } + base_full_backup = find_parent_full_backup(current_backup); - if (base_full_backup == NULL) - elog(ERROR, "Valid full backup for backup %s is not found.", - base36enc(current_backup->start_time)); + /* sanity */ + if (!base_full_backup) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(current_backup->start_time)); } + /* chain is whole, all parents are valid at first glance, + * current backup validation can proceed + */ + else + base_full_backup = tmp_backup; + } + else + base_full_backup = current_backup; - /* Validate corresponding WAL files */ - validate_wal(current_backup, arclog_path, 0, - 0, 0, base_full_backup->tli); + /* Do not interrupt, validate the next backup */ + if (!lock_backup(current_backup, true)) + { + elog(WARNING, "Cannot lock backup %s directory, skip validation", + base36enc(current_backup->start_time)); + skipped_due_to_lock = true; + continue; } + /* Valiate backup files*/ + pgBackupValidate(current_backup, NULL); + + /* Validate corresponding WAL files */ + if (current_backup->status == BACKUP_STATUS_OK) + validate_wal(current_backup, arclog_path, 0, + 0, 0, base_full_backup->tli, + instance_config.xlog_seg_size); - /* Mark every incremental backup between corrupted backup and nearest FULL backup as orphans */ - if (current_backup->status == BACKUP_STATUS_CORRUPT) + /* + * Mark every descendant of corrupted backup as orphan + */ + if (current_backup->status != BACKUP_STATUS_OK) { - int j; + char *current_backup_id; + /* This is ridiculous but legal. + * PAGE_b2 <- OK + * PAGE_a2 <- OK + * PAGE_b1 <- ORPHAN + * PAGE_a1 <- CORRUPT + * FULL <- OK + */ corrupted_backup_found = true; current_backup_id = base36enc_dup(current_backup->start_time); + for (j = i - 1; j >= 0; j--) { pgBackup *backup = (pgBackup *) parray_get(backups, j); - if (backup->backup_mode == BACKUP_MODE_FULL) - break; - if (backup->status != BACKUP_STATUS_OK) - continue; - else + if (is_parent(current_backup->start_time, backup, false)) { - backup->status = BACKUP_STATUS_ORPHAN; - pgBackupWriteBackupControlFile(backup); - - elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted", - base36enc(backup->start_time), current_backup_id); + if (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE) + { + write_backup_status(backup, BACKUP_STATUS_ORPHAN, instance_name, true); + + elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", + base36enc(backup->start_time), + current_backup_id, + status2str(current_backup->status)); + } } } free(current_backup_id); } + + /* For every OK backup we try to revalidate all his ORPHAN descendants. */ + if (current_backup->status == BACKUP_STATUS_OK) + { + /* revalidate all ORPHAN descendants + * be very careful not to miss a missing backup + * for every backup we must check that he is descendant of current_backup + */ + for (j = i - 1; j >= 0; j--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + pgBackup *tmp_backup = NULL; + int result; + + //PAGE_b2 ORPHAN + //PAGE_b1 ORPHAN ----- + //PAGE_a5 ORPHAN | + //PAGE_a4 CORRUPT | + //PAGE_a3 missing | + //PAGE_a2 missing | + //PAGE_a1 ORPHAN | + //PAGE OK <- we are here<-| + //FULL OK + + if (is_parent(current_backup->start_time, backup, false)) + { + /* Revalidation make sense only if parent chain is whole. + * is_parent() do not guarantee that. + */ + result = scan_parent_chain(backup, &tmp_backup); + + if (result == ChainIsInvalid) + { + /* revalidation make sense only if oldest invalid backup is current_backup + */ + + if (tmp_backup->start_time != backup->start_time) + continue; + + if (backup->status == BACKUP_STATUS_ORPHAN) + { + /* Do not interrupt, validate the next backup */ + if (!lock_backup(backup, true)) + { + elog(WARNING, "Cannot lock backup %s directory, skip validation", + base36enc(backup->start_time)); + skipped_due_to_lock = true; + continue; + } + /* Revalidate backup files*/ + pgBackupValidate(backup, NULL); + + if (backup->status == BACKUP_STATUS_OK) + { + + /* Revalidation successful, validate corresponding WAL files */ + validate_wal(backup, arclog_path, 0, + 0, 0, current_backup->tli, + instance_config.xlog_seg_size); + } + } + + if (backup->status != BACKUP_STATUS_OK) + { + corrupted_backup_found = true; + continue; + } + } + } + } + } } /* cleanup */ diff --git a/tests/Readme.md b/tests/Readme.md index 31dfb6560..c1dd9a63d 100644 --- a/tests/Readme.md +++ b/tests/Readme.md @@ -1,7 +1,11 @@ [см wiki](https://confluence.postgrespro.ru/display/DEV/pg_probackup) ``` -Note: For now there are tests only for Linix +Note: For now these are works on Linux and "kinda" works on Windows +``` + +``` +Windows Note: For tablespaces tests to work on Windows, you should explicitly(!) grant current user full access to tmp_dirs ``` @@ -13,12 +17,24 @@ Check physical correctness of restored instances: Check archive compression: export ARCHIVE_COMPRESSION=ON +Enable compatibility tests: + export PGPROBACKUPBIN_OLD=/path/to/previous_version_pg_probackup_binary + Specify path to pg_probackup binary file. By default tests use /pg_probackup/ export PGPROBACKUPBIN= +Remote backup depends on key authentication to local machine via ssh as current user. + export PGPROBACKUP_SSH_REMOTE=ON + +Run suit of basic simple tests: + export PG_PROBACKUP_TEST_BASIC=ON + +Run ptrack tests: + export PG_PROBACKUP_PTRACK=ON + + Usage: - pip install testgres - pip install psycopg2 + pip install testgres==1.8.2 export PG_CONFIG=/path/to/pg_config python -m unittest [-v] tests[.specific_module][.class.test] ``` diff --git a/tests/__init__.py b/tests/__init__.py index aeeabf2a9..dbf84feea 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,69 +1,65 @@ import unittest +import os -from . import init_test, option_test, show_test, \ - backup_test, delete_test, restore_test, validate_test, \ - retention_test, ptrack_clean, ptrack_cluster, \ - ptrack_move_to_tablespace, ptrack_recovery, ptrack_vacuum, \ - ptrack_vacuum_bits_frozen, ptrack_vacuum_bits_visibility, \ - ptrack_vacuum_full, ptrack_vacuum_truncate, pgpro560, pgpro589, \ - false_positive, replica, compression, page, ptrack, archive, \ - exclude, cfs_backup, cfs_restore, cfs_validate_backup, auth_test +from . import init, merge, option, show, compatibility, \ + backup, delete, delta, restore, validate, \ + retention, pgpro560, pgpro589, pgpro2068, false_positive, replica, \ + compression, page, ptrack, archive, exclude, cfs_backup, cfs_restore, \ + cfs_validate_backup, auth_test, time_stamp, snapfs, logging, \ + locking, remote, external, config, checkdb, set_backup, incr_restore def load_tests(loader, tests, pattern): suite = unittest.TestSuite() + + if 'PG_PROBACKUP_TEST_BASIC' in os.environ: + if os.environ['PG_PROBACKUP_TEST_BASIC'] == 'ON': + loader.testMethodPrefix = 'test_basic' + + if 'PG_PROBACKUP_PTRACK' in os.environ: + if os.environ['PG_PROBACKUP_PTRACK'] == 'ON': + suite.addTests(loader.loadTestsFromModule(ptrack)) + # suite.addTests(loader.loadTestsFromModule(auth_test)) suite.addTests(loader.loadTestsFromModule(archive)) - suite.addTests(loader.loadTestsFromModule(backup_test)) - suite.addTests(loader.loadTestsFromModule(cfs_backup)) + suite.addTests(loader.loadTestsFromModule(backup)) + suite.addTests(loader.loadTestsFromModule(compatibility)) + suite.addTests(loader.loadTestsFromModule(checkdb)) + suite.addTests(loader.loadTestsFromModule(config)) +# suite.addTests(loader.loadTestsFromModule(cfs_backup)) # suite.addTests(loader.loadTestsFromModule(cfs_restore)) # suite.addTests(loader.loadTestsFromModule(cfs_validate_backup)) -# suite.addTests(loader.loadTestsFromModule(logging)) suite.addTests(loader.loadTestsFromModule(compression)) - suite.addTests(loader.loadTestsFromModule(delete_test)) + suite.addTests(loader.loadTestsFromModule(delete)) + suite.addTests(loader.loadTestsFromModule(delta)) suite.addTests(loader.loadTestsFromModule(exclude)) + suite.addTests(loader.loadTestsFromModule(external)) suite.addTests(loader.loadTestsFromModule(false_positive)) - suite.addTests(loader.loadTestsFromModule(init_test)) - suite.addTests(loader.loadTestsFromModule(option_test)) + suite.addTests(loader.loadTestsFromModule(init)) + suite.addTests(loader.loadTestsFromModule(incr_restore)) + suite.addTests(loader.loadTestsFromModule(locking)) + suite.addTests(loader.loadTestsFromModule(logging)) + suite.addTests(loader.loadTestsFromModule(merge)) + suite.addTests(loader.loadTestsFromModule(option)) suite.addTests(loader.loadTestsFromModule(page)) - suite.addTests(loader.loadTestsFromModule(ptrack)) - suite.addTests(loader.loadTestsFromModule(ptrack_clean)) - suite.addTests(loader.loadTestsFromModule(ptrack_cluster)) - suite.addTests(loader.loadTestsFromModule(ptrack_move_to_tablespace)) - suite.addTests(loader.loadTestsFromModule(ptrack_recovery)) - suite.addTests(loader.loadTestsFromModule(ptrack_vacuum)) - suite.addTests(loader.loadTestsFromModule(ptrack_vacuum_bits_frozen)) - suite.addTests(loader.loadTestsFromModule(ptrack_vacuum_bits_visibility)) - suite.addTests(loader.loadTestsFromModule(ptrack_vacuum_full)) - suite.addTests(loader.loadTestsFromModule(ptrack_vacuum_truncate)) - suite.addTests(loader.loadTestsFromModule(replica)) - suite.addTests(loader.loadTestsFromModule(restore_test)) - suite.addTests(loader.loadTestsFromModule(retention_test)) - suite.addTests(loader.loadTestsFromModule(show_test)) - suite.addTests(loader.loadTestsFromModule(validate_test)) suite.addTests(loader.loadTestsFromModule(pgpro560)) suite.addTests(loader.loadTestsFromModule(pgpro589)) + suite.addTests(loader.loadTestsFromModule(pgpro2068)) + suite.addTests(loader.loadTestsFromModule(remote)) + suite.addTests(loader.loadTestsFromModule(replica)) + suite.addTests(loader.loadTestsFromModule(restore)) + suite.addTests(loader.loadTestsFromModule(retention)) + suite.addTests(loader.loadTestsFromModule(set_backup)) + suite.addTests(loader.loadTestsFromModule(show)) + suite.addTests(loader.loadTestsFromModule(snapfs)) + suite.addTests(loader.loadTestsFromModule(time_stamp)) + suite.addTests(loader.loadTestsFromModule(validate)) return suite # test_pgpro434_2 unexpected success # ToDo: -# archive: -# discrepancy of instance`s SYSTEMID and node`s SYSTEMID should lead to archive-push refusal to work -# replica: -# backup should exit with correct error message if some master* option is missing -# --master* options shoukd not work when backuping master # logging: -# https://jira.postgrespro.ru/browse/PGPRO-584 # https://jira.postgrespro.ru/secure/attachment/20420/20420_doc_logging.md -# ptrack: -# ptrack backup on replica should work correctly # archive: # immediate recovery and full recovery -# backward compatibility: -# previous version catalog must be readable by newer version -# incremental chain from previous version can be continued -# backups from previous version can be restored -# 10vanilla_1.3ptrack + -# 10vanilla+ -# 9.6vanilla_1.3ptrack + diff --git a/tests/archive.py b/tests/archive.py index 8b8eb71aa..01ff5c062 100644 --- a/tests/archive.py +++ b/tests/archive.py @@ -1,10 +1,13 @@ import os +import shutil +import gzip import unittest -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, archive_script +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, GdbException from datetime import datetime, timedelta import subprocess from sys import exit from time import sleep +from distutils.dir_util import copy_tree module_name = 'archive' @@ -19,13 +22,12 @@ def test_pgpro434_1(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + 'checkpoint_timeout': '30s'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) @@ -39,28 +41,24 @@ def test_pgpro434_1(self): result = node.safe_psql("postgres", "SELECT * FROM t_heap") self.backup_node( - backup_dir, 'node', node, - options=["--log-level-file=verbose"]) + backup_dir, 'node', node) node.cleanup() self.restore_node( backup_dir, 'node', node) node.slow_start() - # Recreate backup calagoue + # Recreate backup catalog + self.clean_pb(backup_dir) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) # Make backup - self.backup_node( - backup_dir, 'node', node, - options=["--log-level-file=verbose"]) + self.backup_node(backup_dir, 'node', node) node.cleanup() # Restore Database - self.restore_node( - backup_dir, 'node', node, - options=["--recovery-target-action=promote"]) + self.restore_node(backup_dir, 'node', node) node.slow_start() self.assertEqual( @@ -79,11 +77,10 @@ def test_pgpro434_2(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'max_wal_senders': '2', 'checkpoint_timeout': '30s'} ) self.init_pb(backup_dir) @@ -221,87 +218,158 @@ def test_pgpro434_2(self): # @unittest.skip("skip") def test_pgpro434_3(self): - """Check pg_stop_backup_timeout, needed backup_timeout""" + """ + Check pg_stop_backup_timeout, needed backup_timeout + Fixed in commit d84d79668b0c139 and assert fixed by ptrack 1.7 + """ fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + initdb_params=['--data-checksums']) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - archive_script_path = os.path.join(backup_dir, 'archive_script.sh') - with open(archive_script_path, 'w+') as f: - f.write( - archive_script.format( - backup_dir=backup_dir, node_name='node', count_limit=2)) + node.slow_start() + + gdb = self.backup_node( + backup_dir, 'node', node, + options=[ + "--archive-timeout=60", + "--log-level-file=LOG"], + gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + + self.set_auto_conf(node, {'archive_command': 'exit 1'}) + node.reload() + + gdb.continue_execution_until_exit() + + sleep(1) + + log_file = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(log_file, 'r') as f: + log_content = f.read() + + # in PG =< 9.6 pg_stop_backup always wait + if self.get_version(node) < 100000: + self.assertIn( + "ERROR: pg_stop_backup doesn't answer in 60 seconds, cancel it", + log_content) + else: + self.assertIn( + "ERROR: WAL segment 000000010000000000000003 could not be archived in 60 seconds", + log_content) + + log_file = os.path.join(node.logs_dir, 'postgresql.log') + with open(log_file, 'r') as f: + log_content = f.read() + + self.assertNotIn( + 'FailedAssertion', + log_content, + 'PostgreSQL crashed because of a failed assert') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_pgpro434_4(self): + """ + Check pg_stop_backup_timeout, libpq-timeout requested. + Fixed in commit d84d79668b0c139 and assert fixed by ptrack 1.7 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) - st = os.stat(archive_script_path) - os.chmod(archive_script_path, st.st_mode | 0o111) - node.append_conf( - 'postgresql.auto.conf', "archive_command = '{0} %p %f'".format( - archive_script_path)) node.slow_start() - try: - self.backup_node( + + gdb = self.backup_node( backup_dir, 'node', node, options=[ "--archive-timeout=60", - "--log-level-file=verbose", - "--stream"] - ) - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because pg_stop_backup failed to answer.\n " - "Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - "ERROR: pg_stop_backup doesn't answer" in e.message and - "cancel it" in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) + "--log-level-file=info"], + gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + + self.set_auto_conf(node, {'archive_command': "'exit 1'"}) + node.reload() + + os.environ["PGAPPNAME"] = "foo" + + pid = node.safe_psql( + "postgres", + "SELECT pid " + "FROM pg_stat_activity " + "WHERE application_name = 'pg_probackup'").rstrip() + + os.environ["PGAPPNAME"] = "pg_probackup" + + postgres_gdb = self.gdb_attach(pid) + postgres_gdb.set_breakpoint('do_pg_stop_backup') + postgres_gdb.continue_execution_until_running() + + gdb.continue_execution_until_exit() + # gdb._execute('detach') + + log_file = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(log_file, 'r') as f: + log_content = f.read() + + self.assertIn( + "ERROR: pg_stop_backup doesn't answer in 60 seconds, cancel it", + log_content) log_file = os.path.join(node.logs_dir, 'postgresql.log') with open(log_file, 'r') as f: log_content = f.read() - self.assertNotIn( - 'FailedAssertion', - log_content, - 'PostgreSQL crashed because of a failed assert') + + self.assertNotIn( + 'FailedAssertion', + log_content, + 'PostgreSQL crashed because of a failed assert') # Clean after yourself self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_arhive_push_file_exists(self): + def test_archive_push_file_exists(self): """Archive-push if file exists""" fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + 'checkpoint_timeout': '30s'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) wals_dir = os.path.join(backup_dir, 'wal', 'node') if self.archive_compress: - file = os.path.join(wals_dir, '000000010000000000000001.gz') + filename = '000000010000000000000001.gz' + file = os.path.join(wals_dir, filename) else: - file = os.path.join(wals_dir, '000000010000000000000001') + filename = '000000010000000000000001' + file = os.path.join(wals_dir, filename) with open(file, 'a') as f: f.write(b"blablablaadssaaaaaaaaaaaaaaa") @@ -316,52 +384,79 @@ def test_arhive_push_file_exists(self): "from generate_series(0,100500) i") log_file = os.path.join(node.logs_dir, 'postgresql.log') + self.switch_wal_segment(node) + sleep(1) + with open(log_file, 'r') as f: log_content = f.read() - self.assertTrue( - 'LOG: archive command failed with exit code 1' in log_content and - 'DETAIL: The failed archive command was:' in log_content and - 'INFO: pg_probackup archive-push from' in log_content and - 'ERROR: WAL segment "{0}" already exists.'.format(file) in log_content, - 'Expecting error messages about failed archive_command' - ) - self.assertFalse('pg_probackup archive-push completed successfully' in log_content) + self.assertIn( + 'LOG: archive command failed with exit code 1', + log_content) + + self.assertIn( + 'DETAIL: The failed archive command was:', + log_content) + + self.assertIn( + 'pg_probackup archive-push WAL file', + log_content) + + self.assertIn( + 'WAL file already exists in archive with different checksum', + log_content) + + self.assertNotIn( + 'pg_probackup archive-push completed successfully', log_content) + + if self.get_version(node) < 100000: + wal_src = os.path.join( + node.data_dir, 'pg_xlog', '000000010000000000000001') + else: + wal_src = os.path.join( + node.data_dir, 'pg_wal', '000000010000000000000001') + + if self.archive_compress: + with open(wal_src, 'rb') as f_in, gzip.open( + file, 'wb', compresslevel=1) as f_out: + shutil.copyfileobj(f_in, f_out) + else: + shutil.copyfile(wal_src, file) - os.remove(file) self.switch_wal_segment(node) sleep(5) with open(log_file, 'r') as f: log_content = f.read() - self.assertTrue( - 'pg_probackup archive-push completed successfully' in log_content, - 'Expecting messages about successfull execution archive_command') + + self.assertIn( + 'pg_probackup archive-push completed successfully', + log_content) # Clean after yourself self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_arhive_push_file_exists_overwrite(self): + def test_archive_push_file_exists_overwrite(self): """Archive-push if file exists""" fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + pg_options={'checkpoint_timeout': '30s'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) wals_dir = os.path.join(backup_dir, 'wal', 'node') if self.archive_compress: - file = os.path.join(wals_dir, '000000010000000000000001.gz') + filename = '000000010000000000000001.gz' + file = os.path.join(wals_dir, filename) else: - file = os.path.join(wals_dir, '000000010000000000000001') + filename = '000000010000000000000001' + file = os.path.join(wals_dir, filename) with open(file, 'a') as f: f.write(b"blablablaadssaaaaaaaaaaaaaaa") @@ -376,21 +471,32 @@ def test_arhive_push_file_exists_overwrite(self): "from generate_series(0,100500) i") log_file = os.path.join(node.logs_dir, 'postgresql.log') + self.switch_wal_segment(node) + sleep(1) + with open(log_file, 'r') as f: log_content = f.read() - self.assertTrue( - 'LOG: archive command failed with exit code 1' in log_content and - 'DETAIL: The failed archive command was:' in log_content and - 'INFO: pg_probackup archive-push from' in log_content and - 'ERROR: WAL segment "{0}" already exists.'.format(file) in log_content, - 'Expecting error messages about failed archive_command' - ) - self.assertFalse('pg_probackup archive-push completed successfully' in log_content) + + self.assertIn( + 'LOG: archive command failed with exit code 1', log_content) + self.assertIn( + 'DETAIL: The failed archive command was:', log_content) + self.assertIn( + 'pg_probackup archive-push WAL file', log_content) + self.assertNotIn( + 'WAL file already exists in archive with ' + 'different checksum, overwriting', log_content) + self.assertIn( + 'WAL file already exists in archive with ' + 'different checksum', log_content) + + self.assertNotIn( + 'pg_probackup archive-push completed successfully', log_content) self.set_archiving(backup_dir, 'node', node, overwrite=True) node.reload() self.switch_wal_segment(node) - sleep(2) + sleep(5) with open(log_file, 'r') as f: log_content = f.read() @@ -398,6 +504,169 @@ def test_arhive_push_file_exists_overwrite(self): 'pg_probackup archive-push completed successfully' in log_content, 'Expecting messages about successfull execution archive_command') + self.assertIn( + 'WAL file already exists in archive with ' + 'different checksum, overwriting', log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_archive_push_partial_file_exists(self): + """Archive-push if stale '.part' file exists""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving( + backup_dir, 'node', node, + log_level='verbose', archive_timeout=60) + + node.slow_start() + + # this backup is needed only for validation to xid + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t1(a int)") + + xid = node.safe_psql( + "postgres", + "INSERT INTO t1 VALUES (1) RETURNING (xmin)").rstrip() + + if self.get_version(node) < 100000: + filename_orig = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_xlogfile_name_offset(pg_current_xlog_location());").rstrip() + else: + filename_orig = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_walfile_name_offset(pg_current_wal_flush_lsn());").rstrip() + + # form up path to next .part WAL segment + wals_dir = os.path.join(backup_dir, 'wal', 'node') + if self.archive_compress: + filename = filename_orig + '.gz' + '.part' + file = os.path.join(wals_dir, filename) + else: + filename = filename_orig + '.part' + file = os.path.join(wals_dir, filename) + + # emulate stale .part file + with open(file, 'a') as f: + f.write(b"blahblah") + f.flush() + f.close() + + self.switch_wal_segment(node) + sleep(70) + + # check that segment is archived + if self.archive_compress: + filename_orig = filename_orig + '.gz' + + file = os.path.join(wals_dir, filename_orig) + self.assertTrue(os.path.isfile(file)) + + # successful validate means that archive-push reused stale wal segment + self.validate_pb( + backup_dir, 'node', + options=['--recovery-target-xid={0}'.format(xid)]) + + log_file = os.path.join(node.logs_dir, 'postgresql.log') + with open(log_file, 'r') as f: + log_content = f.read() + + self.assertIn( + 'Reusing stale temp WAL file', + log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_archive_push_part_file_exists_not_stale(self): + """Archive-push if .part file exists and it is not stale""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, archive_timeout=60) + + node.slow_start() + + node.safe_psql( + "postgres", + "create table t1()") + self.switch_wal_segment(node) + + node.safe_psql( + "postgres", + "create table t2()") + + if self.get_version(node) < 100000: + filename_orig = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_xlogfile_name_offset(pg_current_xlog_location());").rstrip() + else: + filename_orig = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_walfile_name_offset(pg_current_wal_flush_lsn());").rstrip() + + # form up path to next .part WAL segment + wals_dir = os.path.join(backup_dir, 'wal', 'node') + if self.archive_compress: + filename = filename_orig + '.gz' + '.part' + file = os.path.join(wals_dir, filename) + else: + filename = filename_orig + '.part' + file = os.path.join(wals_dir, filename) + + with open(file, 'a') as f: + f.write(b"blahblah") + f.flush() + f.close() + + self.switch_wal_segment(node) + sleep(30) + + with open(file, 'a') as f: + f.write(b"blahblahblahblah") + f.flush() + f.close() + + sleep(40) + + # check that segment is NOT archived + if self.archive_compress: + filename_orig = filename_orig + '.gz' + + file = os.path.join(wals_dir, filename_orig) + + self.assertFalse(os.path.isfile(file)) + + # log_file = os.path.join(node.logs_dir, 'postgresql.log') + # with open(log_file, 'r') as f: + # log_content = f.read() + # self.assertIn( + # 'is not stale', + # log_content) + # Clean after yourself self.del_test_dir(module_name, fname) @@ -412,28 +681,28 @@ def test_replica_archive(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'master'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'max_wal_senders': '2', + 'archive_timeout': '10s', 'checkpoint_timeout': '30s', - 'max_wal_size': '1GB'} - ) + 'max_wal_size': '32MB'}) + self.init_pb(backup_dir) # ADD INSTANCE 'MASTER' self.add_instance(backup_dir, 'master', master) master.slow_start() replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'replica')) replica.cleanup() master.psql( "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,2560) i") self.backup_node(backup_dir, 'master', master, options=['--stream']) before = master.safe_psql("postgres", "SELECT * FROM t_heap") @@ -459,9 +728,6 @@ def test_replica_archive(self): "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(256,512) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") - # ADD INSTANCE 'REPLICA' - - sleep(1) backup_id = self.backup_node( backup_dir, 'replica', replica, @@ -469,18 +735,20 @@ def test_replica_archive(self): '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) # RESTORE FULL BACKUP TAKEN FROM replica node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node')) node.cleanup() self.restore_node(backup_dir, 'replica', data_dir=node.data_dir) - node.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + + self.set_auto_conf(node, {'port': node.port}) node.slow_start() # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") @@ -493,16 +761,22 @@ def test_replica_archive(self): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(512,768) i") + "from generate_series(512,80680) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + + self.wait_until_replica_catch_with_master(master, replica) + backup_id = self.backup_node( backup_dir, 'replica', replica, backup_type='page', options=[ - '--archive-timeout=30', '--log-level-file=verbose', - '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)] - ) + '--archive-timeout=60', + '--master-db=postgres', + '--master-host=localhost', + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -511,8 +785,9 @@ def test_replica_archive(self): node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) - node.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + + self.set_auto_conf(node, {'port': node.port}) + node.slow_start() # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") @@ -533,14 +808,14 @@ def test_master_and_replica_parallel_archiving(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'master'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'checkpoint_timeout': '30s'} + 'archive_timeout': '10s'} ) replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'replica')) replica.cleanup() self.init_pb(backup_dir) @@ -568,7 +843,7 @@ def test_master_and_replica_parallel_archiving(self): pgdata_replica = self.pgdata_content(replica.data_dir) self.compare_pgdata(pgdata_master, pgdata_replica) - self.set_replica(master, replica, synchronous=True) + self.set_replica(master, replica) # ADD INSTANCE REPLICA self.add_instance(backup_dir, 'replica', replica) # SET ARCHIVING FOR REPLICA @@ -579,16 +854,21 @@ def test_master_and_replica_parallel_archiving(self): after = replica.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) - # TAKE FULL ARCHIVE BACKUP FROM REPLICA + master.psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0, 60000) i") + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ - '--archive-timeout=20', - '--log-level-file=verbose', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)] - ) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -604,24 +884,25 @@ def test_master_and_replica_parallel_archiving(self): # @unittest.expectedFailure # @unittest.skip("skip") - def test_master_and_replica_concurrent_archiving(self): + def test_basic_master_and_replica_concurrent_archiving(self): """ make node 'master 'with archiving, take archive backup and turn it into replica, - set replica with archiving, make archive backup from replica, - make archive backup from master + set replica with archiving, + make sure that archiving on both node is working. """ fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'master'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'checkpoint_timeout': '30s'} - ) + 'checkpoint_timeout': '30s', + 'archive_timeout': '10s'}) + replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'replica')) replica.cleanup() self.init_pb(backup_dir) @@ -636,6 +917,8 @@ def test_master_and_replica_concurrent_archiving(self): "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(0,10000) i") + master.pgbench_init(scale=5) + # TAKE FULL ARCHIVE BACKUP FROM MASTER self.backup_node(backup_dir, 'master', master) # GET LOGICAL CONTENT FROM MASTER @@ -650,11 +933,11 @@ def test_master_and_replica_concurrent_archiving(self): pgdata_replica = self.pgdata_content(replica.data_dir) self.compare_pgdata(pgdata_master, pgdata_replica) - self.set_replica(master, replica, synchronous=True) + self.set_replica(master, replica, synchronous=False) # ADD INSTANCE REPLICA # self.add_instance(backup_dir, 'replica', replica) # SET ARCHIVING FOR REPLICA - # self.set_archiving(backup_dir, 'replica', replica, replica=True) + self.set_archiving(backup_dir, 'master', replica, replica=True) replica.slow_start(replica=True) # CHECK LOGICAL CORRECTNESS on REPLICA @@ -668,13 +951,7 @@ def test_master_and_replica_concurrent_archiving(self): "from generate_series(0,10000) i") # TAKE FULL ARCHIVE BACKUP FROM REPLICA - backup_id = self.backup_node( - backup_dir, 'master', replica, - options=[ - '--archive-timeout=30', - '--master-host=localhost', - '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + backup_id = self.backup_node(backup_dir, 'master', replica) self.validate_pb(backup_dir, 'master') self.assertEqual( @@ -686,8 +963,20 @@ def test_master_and_replica_concurrent_archiving(self): self.assertEqual( 'OK', self.show_pb(backup_dir, 'master', backup_id)['status']) + master.pgbench_init(scale=10) + + sleep(10) + + replica.promote() + + master.pgbench_init(scale=10) + replica.pgbench_init(scale=10) + + self.backup_node(backup_dir, 'master', master) + self.backup_node(backup_dir, 'master', replica) + # Clean after yourself - self.del_test_dir(module_name, fname) + self.del_test_dir(module_name, fname, nodes=[master, replica]) # @unittest.expectedFailure # @unittest.skip("skip") @@ -696,16 +985,15 @@ def test_archive_pg_receivexlog(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + 'checkpoint_timeout': '30s'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() if self.get_version(node) < 100000: pg_receivexlog_path = self.get_bin_path('pg_receivexlog') else: @@ -715,7 +1003,7 @@ def test_archive_pg_receivexlog(self): [ pg_receivexlog_path, '-p', str(node.port), '--synchronous', '-D', os.path.join(backup_dir, 'wal', 'node') - ], async=True) + ], asynchronous=True) if pg_receivexlog.returncode: self.assertFalse( @@ -770,18 +1058,17 @@ def test_archive_pg_receivexlog_compression_pg10(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'max_wal_senders': '2', 'checkpoint_timeout': '30s'} ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() if self.get_version(node) < self.version_to_num('10.0'): - return unittest.skip('You need PostgreSQL 10 for this test') + return unittest.skip('You need PostgreSQL >= 10 for this test') else: pg_receivexlog_path = self.get_bin_path('pg_receivewal') @@ -789,7 +1076,7 @@ def test_archive_pg_receivexlog_compression_pg10(self): [ pg_receivexlog_path, '-p', str(node.port), '--synchronous', '-Z', '9', '-D', os.path.join(backup_dir, 'wal', 'node') - ], async=True) + ], asynchronous=True) if pg_receivexlog.returncode: self.assertFalse( @@ -831,3 +1118,1370 @@ def test_archive_pg_receivexlog_compression_pg10(self): # Clean after yourself pg_receivexlog.kill() self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_archive_catalog(self): + """ + ARCHIVE replica: + + t6 |----------------------- + t5 | |------- + | | + t4 | |-------------- + | | + t3 | |--B1--|/|--B2-|/|-B3--- + | | + t2 |--A1--------A2--- + t1 ---------Y1--Y2-- + + ARCHIVE master: + t1 -Z1--Z2--- + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '30s', + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + + master.slow_start() + + # FULL + master.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + self.backup_node(backup_dir, 'master', master) + + # PAGE + master.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(10000,20000) i") + + self.backup_node( + backup_dir, 'master', master, backup_type='page') + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + self.set_replica(master, replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + copy_tree( + os.path.join(backup_dir, 'wal', 'master'), + os.path.join(backup_dir, 'wal', 'replica')) + + replica.slow_start(replica=True) + + # FULL backup replica + Y1 = self.backup_node( + backup_dir, 'replica', replica, + options=['--stream', '--archive-timeout=60s']) + + master.pgbench_init(scale=5) + + # PAGE backup replica + Y2 = self.backup_node( + backup_dir, 'replica', replica, + backup_type='page', options=['--stream', '--archive-timeout=60s']) + + # create timeline t2 + replica.promote() + + # FULL backup replica + A1 = self.backup_node( + backup_dir, 'replica', replica) + + replica.pgbench_init(scale=5) + + replica.safe_psql( + 'postgres', + "CREATE TABLE t1 (a text)") + + target_xid = None + with replica.connect("postgres") as con: + res = con.execute( + "INSERT INTO t1 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + # DELTA backup replica + A2 = self.backup_node( + backup_dir, 'replica', replica, backup_type='delta') + + # create timeline t3 + replica.cleanup() + self.restore_node( + backup_dir, 'replica', replica, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=2', + '--recovery-target-action=promote']) + + replica.slow_start() + + B1 = self.backup_node( + backup_dir, 'replica', replica) + + replica.pgbench_init(scale=2) + + B2 = self.backup_node( + backup_dir, 'replica', replica, backup_type='page') + + replica.pgbench_init(scale=2) + + target_xid = None + with replica.connect("postgres") as con: + res = con.execute( + "INSERT INTO t1 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + B3 = self.backup_node( + backup_dir, 'replica', replica, backup_type='page') + + replica.pgbench_init(scale=2) + + # create timeline t4 + replica.cleanup() + self.restore_node( + backup_dir, 'replica', replica, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=3', + '--recovery-target-action=promote']) + + replica.slow_start() + + replica.safe_psql( + 'postgres', + 'CREATE TABLE ' + 't2 as select i, ' + 'repeat(md5(i::text),5006056) as fat_attr ' + 'from generate_series(0,6) i') + + target_xid = None + with replica.connect("postgres") as con: + res = con.execute( + "INSERT INTO t1 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + replica.safe_psql( + 'postgres', + 'CREATE TABLE ' + 't3 as select i, ' + 'repeat(md5(i::text),5006056) as fat_attr ' + 'from generate_series(0,10) i') + + # create timeline t5 + replica.cleanup() + self.restore_node( + backup_dir, 'replica', replica, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=4', + '--recovery-target-action=promote']) + + replica.slow_start() + + replica.safe_psql( + 'postgres', + 'CREATE TABLE ' + 't4 as select i, ' + 'repeat(md5(i::text),5006056) as fat_attr ' + 'from generate_series(0,6) i') + + # create timeline t6 + replica.cleanup() + + self.restore_node( + backup_dir, 'replica', replica, backup_id=A1, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + replica.slow_start() + + replica.pgbench_init(scale=2) + + sleep(5) + + show = self.show_archive(backup_dir, as_text=True) + show = self.show_archive(backup_dir) + + for instance in show: + if instance['instance'] == 'replica': + replica_timelines = instance['timelines'] + + if instance['instance'] == 'master': + master_timelines = instance['timelines'] + + # check that all timelines are ok + for timeline in replica_timelines: + self.assertTrue(timeline['status'], 'OK') + + # check that all timelines are ok + for timeline in master_timelines: + self.assertTrue(timeline['status'], 'OK') + + # create holes in t3 + wals_dir = os.path.join(backup_dir, 'wal', 'replica') + wals = [ + f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) + and not f.endswith('.backup') and not f.endswith('.history') and f.startswith('00000003') + ] + wals.sort() + + # check that t3 is ok + self.show_archive(backup_dir) + + file = os.path.join(backup_dir, 'wal', 'replica', '000000030000000000000017') + if self.archive_compress: + file = file + '.gz' + os.remove(file) + + file = os.path.join(backup_dir, 'wal', 'replica', '000000030000000000000012') + if self.archive_compress: + file = file + '.gz' + os.remove(file) + + file = os.path.join(backup_dir, 'wal', 'replica', '000000030000000000000013') + if self.archive_compress: + file = file + '.gz' + os.remove(file) + + # check that t3 is not OK + show = self.show_archive(backup_dir) + + show = self.show_archive(backup_dir) + + for instance in show: + if instance['instance'] == 'replica': + replica_timelines = instance['timelines'] + + # sanity + for timeline in replica_timelines: + if timeline['tli'] == 1: + timeline_1 = timeline + continue + + if timeline['tli'] == 2: + timeline_2 = timeline + continue + + if timeline['tli'] == 3: + timeline_3 = timeline + continue + + if timeline['tli'] == 4: + timeline_4 = timeline + continue + + if timeline['tli'] == 5: + timeline_5 = timeline + continue + + if timeline['tli'] == 6: + timeline_6 = timeline + continue + + self.assertEqual(timeline_6['status'], "OK") + self.assertEqual(timeline_5['status'], "OK") + self.assertEqual(timeline_4['status'], "OK") + self.assertEqual(timeline_3['status'], "DEGRADED") + self.assertEqual(timeline_2['status'], "OK") + self.assertEqual(timeline_1['status'], "OK") + + self.assertEqual(len(timeline_3['lost-segments']), 2) + self.assertEqual( + timeline_3['lost-segments'][0]['begin-segno'], + '000000030000000000000012') + self.assertEqual( + timeline_3['lost-segments'][0]['end-segno'], + '000000030000000000000013') + self.assertEqual( + timeline_3['lost-segments'][1]['begin-segno'], + '000000030000000000000017') + self.assertEqual( + timeline_3['lost-segments'][1]['end-segno'], + '000000030000000000000017') + + self.assertEqual(len(timeline_6['backups']), 0) + self.assertEqual(len(timeline_5['backups']), 0) + self.assertEqual(len(timeline_4['backups']), 0) + self.assertEqual(len(timeline_3['backups']), 3) + self.assertEqual(len(timeline_2['backups']), 2) + self.assertEqual(len(timeline_1['backups']), 2) + + # check closest backup correctness + self.assertEqual(timeline_6['closest-backup-id'], A1) + self.assertEqual(timeline_5['closest-backup-id'], B2) + self.assertEqual(timeline_4['closest-backup-id'], B2) + self.assertEqual(timeline_3['closest-backup-id'], A1) + self.assertEqual(timeline_2['closest-backup-id'], Y2) + + # check parent tli correctness + self.assertEqual(timeline_6['parent-tli'], 2) + self.assertEqual(timeline_5['parent-tli'], 4) + self.assertEqual(timeline_4['parent-tli'], 3) + self.assertEqual(timeline_3['parent-tli'], 2) + self.assertEqual(timeline_2['parent-tli'], 1) + self.assertEqual(timeline_1['parent-tli'], 0) + + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_archive_catalog_1(self): + """ + double segment - compressed and not + """ + if not self.archive_compress: + return self.fail( + 'You need to enable ARCHIVE_COMPRESSION for this test to run') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '30s', + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, compress=True) + + node.slow_start() + + # FULL + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=2) + + wals_dir = os.path.join(backup_dir, 'wal', 'node') + original_file = os.path.join(wals_dir, '000000010000000000000001.gz') + tmp_file = os.path.join(wals_dir, '000000010000000000000001') + + with gzip.open(original_file, 'rb') as f_in, open(tmp_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + os.rename( + os.path.join(wals_dir, '000000010000000000000001'), + os.path.join(wals_dir, '000000010000000000000002')) + + show = self.show_archive(backup_dir) + + for instance in show: + timelines = instance['timelines'] + + # sanity + for timeline in timelines: + self.assertEqual( + timeline['min-segno'], + '000000010000000000000001') + self.assertEqual(timeline['status'], 'OK') + + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_archive_catalog_2(self): + """ + double segment - compressed and not + """ + if not self.archive_compress: + return self.fail( + 'You need to enable ARCHIVE_COMPRESSION for this test to run') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '30s', + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, compress=True) + + node.slow_start() + + # FULL + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=2) + + wals_dir = os.path.join(backup_dir, 'wal', 'node') + original_file = os.path.join(wals_dir, '000000010000000000000001.gz') + tmp_file = os.path.join(wals_dir, '000000010000000000000001') + + with gzip.open(original_file, 'rb') as f_in, open(tmp_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + os.rename( + os.path.join(wals_dir, '000000010000000000000001'), + os.path.join(wals_dir, '000000010000000000000002')) + + os.remove(original_file) + + show = self.show_archive(backup_dir) + + for instance in show: + timelines = instance['timelines'] + + # sanity + for timeline in timelines: + self.assertEqual( + timeline['min-segno'], + '000000010000000000000002') + self.assertEqual(timeline['status'], 'OK') + + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_archive_options(self): + """ + check that '--archive-host', '--archive-user', '--archiver-port' + and '--restore-command' are working as expected. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, compress=True) + + node.slow_start() + + # FULL + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=1) + + node.cleanup() + + wal_dir = os.path.join(backup_dir, 'wal', 'node') + self.restore_node( + backup_dir, 'node', node, + options=[ + '--restore-command="cp {0}/%f %p"'.format(wal_dir), + '--archive-host=localhost', + '--archive-port=22', + '--archive-user={0}'.format(self.user) + ]) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + with open(recovery_conf, 'r') as f: + recovery_content = f.read() + + self.assertIn( + 'restore_command = \'"cp {0}/%f %p"\''.format(wal_dir), + recovery_content) + + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, + options=[ + '--archive-host=localhost', + '--archive-port=22', + '--archive-user={0}'.format(self.user)]) + + with open(recovery_conf, 'r') as f: + recovery_content = f.read() + + self.assertIn( + "restore_command = '{0} archive-get -B {1} --instance {2} " + "--wal-file-path=%p --wal-file-name=%f --remote-host=localhost " + "--remote-port=22 --remote-user={3}'".format( + self.probackup_path, backup_dir, 'node', self.user), + recovery_content) + + node.slow_start() + + node.safe_psql( + 'postgres', + 'select 1') + + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_archive_options_1(self): + """ + check that '--archive-host', '--archive-user', '--archiver-port' + and '--restore-command' are working as expected with set-config + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, compress=True) + + node.slow_start() + + # FULL + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=1) + + node.cleanup() + + wal_dir = os.path.join(backup_dir, 'wal', 'node') + self.set_config( + backup_dir, 'node', + options=[ + '--restore-command="cp {0}/%f %p"'.format(wal_dir), + '--archive-host=localhost', + '--archive-port=22', + '--archive-user={0}'.format(self.user)]) + self.restore_node(backup_dir, 'node', node) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + with open(recovery_conf, 'r') as f: + recovery_content = f.read() + + self.assertIn( + 'restore_command = \'"cp {0}/%f %p"\''.format(wal_dir), + recovery_content) + + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, + options=[ + '--restore-command=none'.format(wal_dir), + '--archive-host=localhost1', + '--archive-port=23', + '--archive-user={0}'.format(self.user) + ]) + + with open(recovery_conf, 'r') as f: + recovery_content = f.read() + + self.assertIn( + "restore_command = '{0} archive-get -B {1} --instance {2} " + "--wal-file-path=%p --wal-file-name=%f --remote-host=localhost1 " + "--remote-port=23 --remote-user={3}'".format( + self.probackup_path, backup_dir, 'node', self.user), + recovery_content) + + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_hexadecimal_timeline(self): + """ + Check that timelines are correct. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, log_level='verbose') + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=2) + + # create timelines + for i in range(1, 13): + # print(i) + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=['--recovery-target-timeline={0}'.format(i)]) + node.slow_start() + node.pgbench_init(scale=2) + + sleep(5) + + show = self.show_archive(backup_dir) + + timelines = show[0]['timelines'] + + print(timelines[0]) + + tli13 = timelines[0] + + self.assertEqual( + 13, + tli13['tli']) + + self.assertEqual( + 12, + tli13['parent-tli']) + + self.assertEqual( + backup_id, + tli13['closest-backup-id']) + + self.assertEqual( + '0000000D000000000000001C', + tli13['max-segno']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + # @unittest.expectedFailure + def test_archiving_and_slots(self): + """ + Check that archiving don`t break slot + guarantee. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'checkpoint_timeout': '30s', + 'max_wal_size': '64MB'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, log_level='verbose') + node.slow_start() + + if self.get_version(node) < 100000: + pg_receivexlog_path = self.get_bin_path('pg_receivexlog') + else: + pg_receivexlog_path = self.get_bin_path('pg_receivewal') + + # "pg_receivewal --create-slot --slot archive_slot --if-not-exists " + # "&& pg_receivewal --synchronous -Z 1 /tmp/wal --slot archive_slot --no-loop" + + self.run_binary( + [ + pg_receivexlog_path, '-p', str(node.port), '--synchronous', + '--create-slot', '--slot', 'archive_slot', '--if-not-exists' + ]) + + node.pgbench_init(scale=10) + + pg_receivexlog = self.run_binary( + [ + pg_receivexlog_path, '-p', str(node.port), '--synchronous', + '-D', os.path.join(backup_dir, 'wal', 'node'), + '--no-loop', '--slot', 'archive_slot', + '-Z', '1' + ], asynchronous=True) + + if pg_receivexlog.returncode: + self.assertFalse( + True, + 'Failed to start pg_receivexlog: {0}'.format( + pg_receivexlog.communicate()[1])) + + sleep(2) + + pg_receivexlog.kill() + + backup_id = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=20) + + exit(1) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_archive_push_sanity(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_mode': 'on', + 'archive_command': 'exit 1'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + + node.slow_start() + + node.pgbench_init(scale=50) + node.stop() + + self.set_archiving(backup_dir, 'node', node) + os.remove(os.path.join(node.logs_dir, 'postgresql.log')) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f: + postgres_log_content = f.read() + + # print(postgres_log_content) + # make sure that .backup file is not compressed + self.assertNotIn('.backup.gz', postgres_log_content) + self.assertNotIn('WARNING', postgres_log_content) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node( + backup_dir, 'node', replica, + data_dir=replica.data_dir, options=['-R']) + + #self.set_archiving(backup_dir, 'replica', replica, replica=True) + self.set_auto_conf(replica, {'port': replica.port}) + self.set_auto_conf(replica, {'archive_mode': 'always'}) + self.set_auto_conf(replica, {'hot_standby': 'on'}) + replica.slow_start(replica=True) + + self.wait_until_replica_catch_with_master(node, replica) + + node.pgbench_init(scale=5) + + replica.promote() + replica.pgbench_init(scale=10) + + with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f: + replica_log_content = f.read() + + # make sure that .partial file is not compressed + self.assertNotIn('.partial.gz', replica_log_content) + # make sure that .history file is not compressed + self.assertNotIn('.history.gz', replica_log_content) + self.assertNotIn('WARNING', replica_log_content) + + output = self.show_archive( + backup_dir, 'node', as_json=False, as_text=True, + options=['--log-level-console=VERBOSE']) + + self.assertNotIn('WARNING', output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_archive_pg_receivexlog_partial_handling(self): + """check that archive-get delivers .partial and .gz.partial files""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'archive_timeout': '10s'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + + node.slow_start() + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node( + backup_dir, 'node', replica, replica.data_dir, options=['-R']) + self.set_auto_conf(replica, {'port': replica.port}) + self.set_replica(node, replica) + + self.add_instance(backup_dir, 'replica', replica) + # self.set_archiving(backup_dir, 'replica', replica, replica=True) + + replica.slow_start(replica=True) + + if self.get_version(replica) < 100000: + pg_receivexlog_path = self.get_bin_path('pg_receivexlog') + else: + pg_receivexlog_path = self.get_bin_path('pg_receivewal') + + cmdline = [ + pg_receivexlog_path, '-p', str(replica.port), '--synchronous', + '-D', os.path.join(backup_dir, 'wal', 'replica')] + + if self.archive_compress and node.major_version >= 10: + cmdline += ['-Z', '1'] + + pg_receivexlog = self.run_binary(cmdline, asynchronous=True) + + if pg_receivexlog.returncode: + self.assertFalse( + True, + 'Failed to start pg_receivexlog: {0}'.format( + pg_receivexlog.communicate()[1])) + + # FULL + self.backup_node(backup_dir, 'replica', replica, options=['--stream']) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000000) i") + + # PAGE + self.backup_node( + backup_dir, 'replica', replica, backup_type='delta', options=['--stream']) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(1000000,2000000) i") + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'replica', node_restored, + node_restored.data_dir, options=['--recovery-target=latest', '--recovery-target-action=promote']) + self.set_auto_conf(node_restored, {'port': node_restored.port}) + self.set_auto_conf(node_restored, {'hot_standby': 'off'}) + + # it will set node_restored as warm standby. +# with open(os.path.join(node_restored.data_dir, "standby.signal"), 'w') as f: +# f.flush() +# f.close() + + node_restored.slow_start() + + result = node.safe_psql( + "postgres", + "select sum(id) from t_heap") + + result_new = node_restored.safe_psql( + "postgres", + "select sum(id) from t_heap") + + self.assertEqual(result, result_new) + + # Clean after yourself + pg_receivexlog.kill() + self.del_test_dir( + module_name, fname, [node, replica, node_restored]) + + @unittest.skip("skip") + def test_multi_timeline_recovery_prefetching(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=50) + + target_xid = node.safe_psql( + 'postgres', + 'select txid_current()').rstrip() + + node.pgbench_init(scale=20) + + node.stop() + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-action=promote']) + + node.slow_start() + + node.pgbench_init(scale=20) + + target_xid = node.safe_psql( + 'postgres', + 'select txid_current()').rstrip() + + node.stop(['-m', 'immediate', '-D', node.data_dir]) + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, + options=[ +# '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=2', +# '--recovery-target-action=promote', + '--no-validate']) + node.slow_start() + + node.pgbench_init(scale=20) + result = node.safe_psql( + 'postgres', + 'select * from pgbench_accounts') + node.stop() + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, + options=[ +# '--recovery-target-xid=100500', + '--recovery-target-timeline=3', +# '--recovery-target-action=promote', + '--no-validate']) + os.remove(os.path.join(node.logs_dir, 'postgresql.log')) + + restore_command = self.get_restore_command(backup_dir, 'node', node) + restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE' + + if node.major_version >= 12: + node.append_conf( + 'probackup_recovery.conf', "restore_command = '{0}'".format(restore_command)) + else: + node.append_conf( + 'recovery.conf', "restore_command = '{0}'".format(restore_command)) + + node.slow_start() + + result_new = node.safe_psql( + 'postgres', + 'select * from pgbench_accounts') + + self.assertEqual(result, result_new) + + with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f: + postgres_log_content = f.read() + + # check that requesting of non-existing segment do not + # throwns aways prefetch + self.assertIn( + 'pg_probackup archive-get failed to ' + 'deliver WAL file: 000000030000000000000006', + postgres_log_content) + + self.assertIn( + 'pg_probackup archive-get failed to ' + 'deliver WAL file: 000000020000000000000006', + postgres_log_content) + + self.assertIn( + 'pg_probackup archive-get used prefetched ' + 'WAL segment 000000010000000000000006, prefetch state: 5/10', + postgres_log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_archive_get_batching_sanity(self): + """ + Make sure that batching works. + .gz file is corrupted and uncompressed is not, check that both + corruption detected and uncompressed file is used. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + if self.get_version(node) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + + node.slow_start() + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=50) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node( + backup_dir, 'node', replica, replica.data_dir) + self.set_replica(node, replica, log_shipping=True) + + if node.major_version >= 12: + self.set_auto_conf(replica, {'restore_command': 'exit 1'}) + else: + replica.append_conf('recovery.conf', "restore_command = 'exit 1'") + + replica.slow_start(replica=True) + + # at this point replica is consistent + restore_command = self.get_restore_command(backup_dir, 'node', replica) + + restore_command += ' -j 2 --batch-size=10' + + print(restore_command) + + if node.major_version >= 12: + self.set_auto_conf(replica, {'restore_command': restore_command}) + else: + replica.append_conf( + 'recovery.conf', "restore_command = '{0}'".format(restore_command)) + + replica.restart() + + sleep(5) + + with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f: + postgres_log_content = f.read() + + self.assertIn( + 'pg_probackup archive-get completed successfully, fetched: 10/10', + postgres_log_content) + self.assertIn('used prefetched WAL segment', postgres_log_content) + self.assertIn('prefetch state: 9/10', postgres_log_content) + self.assertIn('prefetch state: 8/10', postgres_log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_archive_get_prefetch_corruption(self): + """ + Make sure that WAL corruption is detected. + And --prefetch-dir is honored. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_keep_segments': '200'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + + node.slow_start() + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=50) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node( + backup_dir, 'node', replica, replica.data_dir) + self.set_replica(node, replica, log_shipping=True) + + if node.major_version >= 12: + self.set_auto_conf(replica, {'restore_command': 'exit 1'}) + else: + replica.append_conf('recovery.conf', "restore_command = 'exit 1'") + + replica.slow_start(replica=True) + + # at this point replica is consistent + restore_command = self.get_restore_command(backup_dir, 'node', replica) + + restore_command += ' -j5 --batch-size=10 --log-level-console=VERBOSE' + #restore_command += ' --batch-size=2 --log-level-console=VERBOSE' + + if node.major_version >= 12: + self.set_auto_conf(replica, {'restore_command': restore_command}) + else: + replica.append_conf( + 'recovery.conf', "restore_command = '{0}'".format(restore_command)) + + replica.restart() + + sleep(5) + + with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f: + postgres_log_content = f.read() + + self.assertIn( + 'pg_probackup archive-get completed successfully, fetched: 10/10', + postgres_log_content) + self.assertIn('used prefetched WAL segment', postgres_log_content) + self.assertIn('prefetch state: 9/10', postgres_log_content) + self.assertIn('prefetch state: 8/10', postgres_log_content) + + replica.stop() + + # generate WAL, copy it into prefetch directory, then corrupt + # some segment + node.pgbench_init(scale=20) + sleep(20) + + # now copy WAL files into prefetch directory and corrupt some of them + archive_dir = os.path.join(backup_dir, 'wal', 'node') + files = os.listdir(archive_dir) + files.sort() + + for filename in [files[-4], files[-3], files[-2], files[-1]]: + src_file = os.path.join(archive_dir, filename) + + if node.major_version >= 10: + wal_dir = 'pg_wal' + else: + wal_dir = 'pg_xlog' + + if filename.endswith('.gz'): + dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename[:-3]) + with gzip.open(src_file, 'rb') as f_in, open(dst_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + else: + dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename) + shutil.copyfile(src_file, dst_file) + + print(dst_file) + + # corrupt file + if files[-2].endswith('.gz'): + filename = files[-2][:-3] + else: + filename = files[-2] + + prefetched_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename) + + with open(prefetched_file, "rb+", 0) as f: + f.seek(8192*2) + f.write(b"SURIKEN") + f.flush() + f.close + + # enable restore_command + restore_command = self.get_restore_command(backup_dir, 'node', replica) + restore_command += ' --batch-size=2 --log-level-console=VERBOSE' + + if node.major_version >= 12: + self.set_auto_conf(replica, {'restore_command': restore_command}) + else: + replica.append_conf( + 'recovery.conf', "restore_command = '{0}'".format(restore_command)) + + os.remove(os.path.join(replica.logs_dir, 'postgresql.log')) + replica.slow_start(replica=True) + + sleep(60) + + with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f: + postgres_log_content = f.read() + + self.assertIn( + 'Prefetched WAL segment {0} is invalid, cannot use it'.format(filename), + postgres_log_content) + + self.assertIn( + 'LOG: restored log file "{0}" from archive'.format(filename), + postgres_log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_archive_show_partial_files_handling(self): + """ + check that files with '.part', '.part.gz', '.partial' and '.partial.gz' + siffixes are handled correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node, compress=False) + + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + wals_dir = os.path.join(backup_dir, 'wal', 'node') + + # .part file + node.safe_psql( + "postgres", + "create table t1()") + + if self.get_version(node) < 100000: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_xlogfile_name_offset(pg_current_xlog_location())").rstrip() + else: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_walfile_name_offset(pg_current_wal_flush_lsn())").rstrip() + + self.switch_wal_segment(node) + + os.rename( + os.path.join(wals_dir, filename), + os.path.join(wals_dir, '{0}.part'.format(filename))) + + # .gz.part file + node.safe_psql( + "postgres", + "create table t2()") + + if self.get_version(node) < 100000: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_xlogfile_name_offset(pg_current_xlog_location())").rstrip() + else: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_walfile_name_offset(pg_current_wal_flush_lsn())").rstrip() + + self.switch_wal_segment(node) + + os.rename( + os.path.join(wals_dir, filename), + os.path.join(wals_dir, '{0}.gz.part'.format(filename))) + + # .partial file + node.safe_psql( + "postgres", + "create table t3()") + + if self.get_version(node) < 100000: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_xlogfile_name_offset(pg_current_xlog_location())").rstrip() + else: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_walfile_name_offset(pg_current_wal_flush_lsn())").rstrip() + + self.switch_wal_segment(node) + + os.rename( + os.path.join(wals_dir, filename), + os.path.join(wals_dir, '{0}.partial'.format(filename))) + + # .gz.partial file + node.safe_psql( + "postgres", + "create table t4()") + + if self.get_version(node) < 100000: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_xlogfile_name_offset(pg_current_xlog_location())").rstrip() + else: + filename = node.safe_psql( + "postgres", + "SELECT file_name " + "FROM pg_walfile_name_offset(pg_current_wal_flush_lsn())").rstrip() + + self.switch_wal_segment(node) + + os.rename( + os.path.join(wals_dir, filename), + os.path.join(wals_dir, '{0}.gz.partial'.format(filename))) + + self.show_archive(backup_dir, 'node', options=['--log-level-file=VERBOSE']) + + with open(os.path.join(backup_dir, 'log', 'pg_probackup.log'), 'r') as f: + log_content = f.read() + + self.assertNotIn( + 'WARNING', + log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + +# TODO test with multiple not archived segments. +# TODO corrupted file in archive. + +# important - switchpoint may be NullOffset LSN and not actually existing in archive to boot. +# so write WAL validation code accordingly + +# change wal-seg-size +# +# +#t3 ---------------- +# / +#t2 ---------------- +# / +#t1 -A-------- +# +# + + +#t3 ---------------- +# / +#t2 ---------------- +# / +#t1 -A-------- +# \ No newline at end of file diff --git a/tests/auth_test.py b/tests/auth_test.py index fc21a480d..eca62316b 100644 --- a/tests/auth_test.py +++ b/tests/auth_test.py @@ -24,26 +24,25 @@ class SimpleAuthTest(ProbackupTest, unittest.TestCase): # @unittest.skip("skip") - def test_backup_via_unpriviledged_user(self): + def test_backup_via_unprivileged_user(self): """ - Make node, create unpriviledged user, try to + Make node, create unprivileged user, try to run a backups without EXECUTE rights on certain functions """ fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_senders': '2'} ) backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() node.safe_psql("postgres", "CREATE ROLE backup with LOGIN") @@ -125,8 +124,10 @@ def test_backup_via_unpriviledged_user(self): node.safe_psql( "test1", "create table t1 as select generate_series(0,100)") - node.append_conf("postgresql.auto.conf", "ptrack_enable = 'on'") - node.restart() + if self.ptrack: + self.set_auto_conf(node, {'ptrack_enable': 'on'}) + node.stop() + node.slow_start() try: self.backup_node( @@ -192,10 +193,7 @@ def setUpClass(cls): cls.node = cls.pb.make_simple_node( base_dir="{}/node".format(module_name), set_replication=True, - initdb_params=['--data-checksums', '--auth-host=md5'], - pg_options={ - 'wal_level': 'replica' - } + initdb_params=['--data-checksums', '--auth-host=md5'] ) modify_pg_hba(cls.node) @@ -203,25 +201,26 @@ def setUpClass(cls): cls.pb.add_instance(cls.backup_dir, cls.node.name, cls.node) cls.pb.set_archiving(cls.backup_dir, cls.node.name, cls.node) try: - cls.node.start() + cls.node.slow_start() except StartNodeException: raise unittest.skip("Node hasn't started") - cls.node.safe_psql("postgres", - "CREATE ROLE backup WITH LOGIN PASSWORD 'password'; \ - GRANT USAGE ON SCHEMA pg_catalog TO backup; \ - GRANT EXECUTE ON FUNCTION current_setting(text) TO backup; \ - GRANT EXECUTE ON FUNCTION pg_is_in_recovery() TO backup; \ - GRANT EXECUTE ON FUNCTION pg_start_backup(text, boolean, boolean) TO backup; \ - GRANT EXECUTE ON FUNCTION pg_stop_backup() TO backup; \ - GRANT EXECUTE ON FUNCTION pg_stop_backup(boolean) TO backup; \ - GRANT EXECUTE ON FUNCTION pg_create_restore_point(text) TO backup; \ - GRANT EXECUTE ON FUNCTION pg_switch_xlog() TO backup; \ - GRANT EXECUTE ON FUNCTION txid_current() TO backup; \ - GRANT EXECUTE ON FUNCTION txid_current_snapshot() TO backup; \ - GRANT EXECUTE ON FUNCTION txid_snapshot_xmax(txid_snapshot) TO backup; \ - GRANT EXECUTE ON FUNCTION pg_ptrack_clear() TO backup; \ - GRANT EXECUTE ON FUNCTION pg_ptrack_get_and_clear(oid, oid) TO backup;") + cls.node.safe_psql( + "postgres", + "CREATE ROLE backup WITH LOGIN PASSWORD 'password'; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT EXECUTE ON FUNCTION current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_stop_backup() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_stop_backup(boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_switch_xlog() TO backup; " + "GRANT EXECUTE ON FUNCTION txid_current() TO backup; " + "GRANT EXECUTE ON FUNCTION txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION txid_snapshot_xmax(txid_snapshot) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_ptrack_clear() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_ptrack_get_and_clear(oid, oid) TO backup;") cls.pgpass_file = os.path.join(os.path.expanduser('~'), '.pgpass') @classmethod diff --git a/tests/backup.py b/tests/backup.py new file mode 100644 index 000000000..73eb21022 --- /dev/null +++ b/tests/backup.py @@ -0,0 +1,2900 @@ +import unittest +import os +from time import sleep +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +import shutil +from distutils.dir_util import copy_tree +from testgres import ProcessType + + +module_name = 'backup' + + +class BackupTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + # PGPRO-707 + def test_backup_modes_archive(self): + """standart backup modes with ARCHIVE WAL method""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + backup_id = self.backup_node(backup_dir, 'node', node) + show_backup = self.show_pb(backup_dir, 'node')[0] + + self.assertEqual(show_backup['status'], "OK") + self.assertEqual(show_backup['backup-mode'], "FULL") + + # postmaster.pid and postmaster.opts shouldn't be copied + excluded = True + db_dir = os.path.join( + backup_dir, "backups", 'node', backup_id, "database") + + for f in os.listdir(db_dir): + if ( + os.path.isfile(os.path.join(db_dir, f)) and + ( + f == "postmaster.pid" or + f == "postmaster.opts" + ) + ): + excluded = False + self.assertEqual(excluded, True) + + # page backup mode + page_backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="page") + + # print self.show_pb(node) + show_backup = self.show_pb(backup_dir, 'node')[1] + self.assertEqual(show_backup['status'], "OK") + self.assertEqual(show_backup['backup-mode'], "PAGE") + + # Check parent backup + self.assertEqual( + backup_id, + self.show_pb( + backup_dir, 'node', + backup_id=show_backup['id'])["parent-backup-id"]) + + # ptrack backup mode + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + + show_backup = self.show_pb(backup_dir, 'node')[2] + self.assertEqual(show_backup['status'], "OK") + self.assertEqual(show_backup['backup-mode'], "PTRACK") + + # Check parent backup + self.assertEqual( + page_backup_id, + self.show_pb( + backup_dir, 'node', + backup_id=show_backup['id'])["parent-backup-id"]) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_smooth_checkpoint(self): + """full backup with smooth checkpoint""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, + options=["-C"]) + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") + node.stop() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incremental_backup_without_full(self): + """page-level backup without validated full backup""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + try: + self.backup_node(backup_dir, 'node', node, backup_type="page") + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "without valid full backup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Valid backup on current timeline 1 is not found" in e.message and + "ERROR: Create new full backup before an incremental one" in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "without valid full backup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Valid backup on current timeline 1 is not found" in e.message and + "ERROR: Create new full backup before an incremental one" in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['status'], + "ERROR") + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['status'], + "ERROR") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incremental_backup_corrupt_full(self): + """page-level backup with corrupted full backup""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + file = os.path.join( + backup_dir, "backups", "node", backup_id, + "database", "postgresql.conf") + os.remove(file) + + try: + self.validate_pb(backup_dir, 'node') + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of validation of corrupted backup.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "INFO: Validate backups of the instance 'node'" in e.message and + "WARNING: Backup file".format( + file) in e.message and + "is not found".format(file) in e.message and + "WARNING: Backup {0} data files are corrupted".format( + backup_id) in e.message and + "WARNING: Some backups are not valid" in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + try: + self.backup_node(backup_dir, 'node', node, backup_type="page") + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "without valid full backup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Valid backup on current timeline 1 is not found" in e.message and + "ERROR: Create new full backup before an incremental one" in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id)['status'], "CORRUPT") + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['status'], "ERROR") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_threads(self): + """ptrack multi thread backup mode""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4"]) + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") + + self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["-j", "4"]) + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_threads_stream(self): + """ptrack multi thread backup mode and stream""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") + self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["-j", "4", "--stream"]) + self.assertEqual(self.show_pb(backup_dir, 'node')[1]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_detect_corruption(self): + """make node, corrupt some page, check that backup failed""" + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + + node.safe_psql( + "postgres", + "CHECKPOINT") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + path = os.path.join(node.data_dir, heap_path) + with open(path, "rb+", 0) as f: + f.seek(9000) + f.write(b"bla") + f.flush() + f.close + + try: + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream", "--log-level-file=VERBOSE"]) + self.assertEqual( + 1, 0, + "Expecting Error because data file is corrupted" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: Corruption detected in file "{0}", ' + 'block 1: page verification failed, calculated checksum'.format(path), + e.message) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['status'], + 'ERROR', + "Backup Status should be ERROR") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + + # @unittest.skip("skip") + def test_backup_detect_corruption(self): + """make node, corrupt some page, check that backup failed""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if self.ptrack and node.major_version > 11: + node.safe_psql( + "postgres", + "create extension ptrack") + + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + node.safe_psql( + "postgres", + "select count(*) from t_heap") + + node.safe_psql( + "postgres", + "update t_heap set id = id + 10000") + + node.stop() + + heap_fullpath = os.path.join(node.data_dir, heap_path) + + with open(heap_fullpath, "rb+", 0) as f: + f.seek(9000) + f.write(b"bla") + f.flush() + f.close + + node.slow_start() + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page verification failed, calculated checksum'.format( + heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page verification failed, calculated checksum'.format( + heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="page", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page verification failed, calculated checksum'.format( + heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + if self.ptrack: + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page verification failed, calculated checksum'.format( + heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_backup_detect_invalid_block_header(self): + """make node, corrupt some page, check that backup failed""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if self.ptrack and node.major_version > 11: + node.safe_psql( + "postgres", + "create extension ptrack") + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + node.safe_psql( + "postgres", + "select count(*) from t_heap") + + node.safe_psql( + "postgres", + "update t_heap set id = id + 10000") + + node.stop() + + heap_fullpath = os.path.join(node.data_dir, heap_path) + with open(heap_fullpath, "rb+", 0) as f: + f.seek(8193) + f.write(b"blahblahblahblah") + f.flush() + f.close + + node.slow_start() + +# self.backup_node( +# backup_dir, 'node', node, +# backup_type="full", options=["-j", "4", "--stream"]) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="page", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + if self.ptrack: + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_backup_detect_missing_permissions(self): + """make node, corrupt some page, check that backup failed""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if self.ptrack and node.major_version > 11: + node.safe_psql( + "postgres", + "create extension ptrack") + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + node.safe_psql( + "postgres", + "select count(*) from t_heap") + + node.safe_psql( + "postgres", + "update t_heap set id = id + 10000") + + node.stop() + + heap_fullpath = os.path.join(node.data_dir, heap_path) + with open(heap_fullpath, "rb+", 0) as f: + f.seek(8193) + f.write(b"blahblahblahblah") + f.flush() + f.close + + node.slow_start() + +# self.backup_node( +# backup_dir, 'node', node, +# backup_type="full", options=["-j", "4", "--stream"]) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="page", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + if self.ptrack: + try: + self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["-j", "4", "--stream"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of block corruption" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Corruption detected in file "{0}", block 1: ' + 'page header invalid, pd_lower'.format(heap_fullpath), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_backup_truncate_misaligned(self): + """ + make node, truncate file to size not even to BLCKSIZE, + take backup + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,100000) i") + + node.safe_psql( + "postgres", + "CHECKPOINT;") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + heap_size = node.safe_psql( + "postgres", + "select pg_relation_size('t_heap')") + + with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f: + f.truncate(int(heap_size) - 4096) + f.flush() + f.close + + output = self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"], return_id=False) + + self.assertIn("WARNING: File", output) + self.assertIn("invalid file size", output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_tablespace_in_pgdata_pgpro_1376(self): + """PGPRO-1376 """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node( + node, 'tblspace1', + tblspc_path=( + os.path.join( + node.data_dir, 'somedirectory', '100500')) + ) + + self.create_tblspace_in_node( + node, 'tblspace2', + tblspc_path=(os.path.join(node.data_dir)) + ) + + node.safe_psql( + "postgres", + "create table t_heap1 tablespace tblspace1 as select 1 as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + + node.safe_psql( + "postgres", + "create table t_heap2 tablespace tblspace2 as select 1 as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + + backup_id_1 = self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + node.safe_psql( + "postgres", + "drop table t_heap2") + node.safe_psql( + "postgres", + "drop tablespace tblspace2") + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + pgdata = self.pgdata_content(node.data_dir) + + relfilenode = node.safe_psql( + "postgres", + "select 't_heap1'::regclass::oid" + ).rstrip() + + list = [] + for root, dirs, files in os.walk(os.path.join( + backup_dir, 'backups', 'node', backup_id_1)): + for file in files: + if file == relfilenode: + path = os.path.join(root, file) + list = list + [path] + + # We expect that relfilenode can be encountered only once + if len(list) > 1: + message = "" + for string in list: + message = message + string + "\n" + self.assertEqual( + 1, 0, + "Following file copied twice by backup:\n {0}".format( + message) + ) + + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_tablespace_handling(self): + """ + make node, take full backup, check that restore with + tablespace mapping will end with error, take page backup, + check that restore with tablespace mapping will end with + success + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + tblspace1_old_path = self.get_tblspace_path(node, 'tblspace1_old') + tblspace2_old_path = self.get_tblspace_path(node, 'tblspace2_old') + + self.create_tblspace_in_node( + node, 'some_lame_tablespace') + + self.create_tblspace_in_node( + node, 'tblspace1', + tblspc_path=tblspace1_old_path) + + self.create_tblspace_in_node( + node, 'tblspace2', + tblspc_path=tblspace2_old_path) + + node.safe_psql( + "postgres", + "create table t_heap_lame tablespace some_lame_tablespace " + "as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + + node.safe_psql( + "postgres", + "create table t_heap2 tablespace tblspace2 as select 1 as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + + tblspace1_new_path = self.get_tblspace_path(node, 'tblspace1_new') + tblspace2_new_path = self.get_tblspace_path(node, 'tblspace2_new') + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format( + tblspace1_old_path, tblspace1_new_path), + "-T", "{0}={1}".format( + tblspace2_old_path, tblspace2_new_path)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because tablespace mapping is incorrect" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: --tablespace-mapping option' in e.message and + 'have an entry in tablespace_map file' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + node.safe_psql( + "postgres", + "drop table t_heap_lame") + + node.safe_psql( + "postgres", + "drop tablespace some_lame_tablespace") + + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=["-j", "4", "--stream"]) + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format( + tblspace1_old_path, tblspace1_new_path), + "-T", "{0}={1}".format( + tblspace2_old_path, tblspace2_new_path)]) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname, nodes=[node]) + + # @unittest.skip("skip") + def test_tablespace_handling_1(self): + """ + make node with tablespace A, take full backup, check that restore with + tablespace mapping of tablespace B will end with error + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + tblspace1_old_path = self.get_tblspace_path(node, 'tblspace1_old') + tblspace2_old_path = self.get_tblspace_path(node, 'tblspace2_old') + + tblspace_new_path = self.get_tblspace_path(node, 'tblspace_new') + + self.create_tblspace_in_node( + node, 'tblspace1', + tblspc_path=tblspace1_old_path) + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format( + tblspace2_old_path, tblspace_new_path)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because tablespace mapping is incorrect" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: --tablespace-mapping option' in e.message and + 'have an entry in tablespace_map file' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_tablespace_handling_2(self): + """ + make node without tablespaces, take full backup, check that restore with + tablespace mapping will end with error + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + tblspace1_old_path = self.get_tblspace_path(node, 'tblspace1_old') + tblspace_new_path = self.get_tblspace_path(node, 'tblspace_new') + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format( + tblspace1_old_path, tblspace_new_path)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because tablespace mapping is incorrect" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: --tablespace-mapping option' in e.message and + 'have an entry in tablespace_map file' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_drop_rel_during_full_backup(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 512): + node.safe_psql( + "postgres", + "create table t_heap_{0} as select i" + " as id from generate_series(0,100) i".format(i)) + + node.safe_psql( + "postgres", + "VACUUM") + + node.pgbench_init(scale=10) + + relative_path_1 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap_1')").rstrip() + + relative_path_2 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap_1')").rstrip() + + absolute_path_1 = os.path.join(node.data_dir, relative_path_1) + absolute_path_2 = os.path.join(node.data_dir, relative_path_2) + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--log-level-file=LOG', '--log-level-console=LOG', '--progress'], + gdb=True) + + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + # REMOVE file + for i in range(1, 512): + node.safe_psql( + "postgres", + "drop table t_heap_{0}".format(i)) + + node.safe_psql( + "postgres", + "CHECKPOINT") + + node.safe_psql( + "postgres", + "CHECKPOINT") + + # File removed, we can proceed with backup + gdb.continue_execution_until_exit() + + pgdata = self.pgdata_content(node.data_dir) + + #with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: + # log_content = f.read() + # self.assertTrue( + # 'LOG: File "{0}" is not found'.format(absolute_path) in log_content, + # 'File "{0}" should be deleted but it`s not'.format(absolute_path)) + + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # Physical comparison + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_drop_db_during_full_backup(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 2): + node.safe_psql( + "postgres", + "create database t_heap_{0}".format(i)) + + node.safe_psql( + "postgres", + "VACUUM") + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True, + options=[ + '--stream', '--log-level-file=LOG', + '--log-level-console=LOG', '--progress']) + + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + # REMOVE file + for i in range(1, 2): + node.safe_psql( + "postgres", + "drop database t_heap_{0}".format(i)) + + node.safe_psql( + "postgres", + "CHECKPOINT") + + node.safe_psql( + "postgres", + "CHECKPOINT") + + # File removed, we can proceed with backup + gdb.continue_execution_until_exit() + + pgdata = self.pgdata_content(node.data_dir) + + #with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: + # log_content = f.read() + # self.assertTrue( + # 'LOG: File "{0}" is not found'.format(absolute_path) in log_content, + # 'File "{0}" should be deleted but it`s not'.format(absolute_path)) + + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # Physical comparison + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_drop_rel_during_backup_delta(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=10) + + node.safe_psql( + "postgres", + "create table t_heap as select i" + " as id from generate_series(0,100) i") + + relative_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + absolute_path = os.path.join(node.data_dir, relative_path) + + # FULL backup + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + # DELTA backup + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + gdb=True, options=['--log-level-file=LOG']) + + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + # REMOVE file + node.safe_psql( + "postgres", + "DROP TABLE t_heap") + + node.safe_psql( + "postgres", + "CHECKPOINT") + + # File removed, we can proceed with backup + gdb.continue_execution_until_exit() + + pgdata = self.pgdata_content(node.data_dir) + + with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: + log_content = f.read() + self.assertTrue( + 'LOG: File not found: "{0}"'.format(absolute_path) in log_content, + 'File "{0}" should be deleted but it`s not'.format(absolute_path)) + + node.cleanup() + self.restore_node(backup_dir, 'node', node, options=["-j", "4"]) + + # Physical comparison + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_drop_rel_during_backup_page(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select i" + " as id from generate_series(0,100) i") + + relative_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + absolute_path = os.path.join(node.data_dir, relative_path) + + # FULL backup + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.safe_psql( + "postgres", + "insert into t_heap select i" + " as id from generate_series(101,102) i") + + # PAGE backup + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='page', + gdb=True, options=['--log-level-file=LOG']) + + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + # REMOVE file + os.remove(absolute_path) + + # File removed, we can proceed with backup + gdb.continue_execution_until_exit() + + pgdata = self.pgdata_content(node.data_dir) + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + filelist = self.get_backup_filelist(backup_dir, 'node', backup_id) + self.assertNotIn(relative_path, filelist) + + node.cleanup() + self.restore_node(backup_dir, 'node', node, options=["-j", "4"]) + + # Physical comparison + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_drop_rel_during_backup_ptrack(self): + """""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + node.safe_psql( + "postgres", + "create table t_heap as select i" + " as id from generate_series(0,100) i") + + relative_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + absolute_path = os.path.join(node.data_dir, relative_path) + + # FULL backup + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + # PTRACK backup + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + gdb=True, options=['--log-level-file=LOG']) + + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + # REMOVE file + os.remove(absolute_path) + + # File removed, we can proceed with backup + gdb.continue_execution_until_exit() + + pgdata = self.pgdata_content(node.data_dir) + + with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: + log_content = f.read() + self.assertTrue( + 'LOG: File not found: "{0}"'.format(absolute_path) in log_content, + 'File "{0}" should be deleted but it`s not'.format(absolute_path)) + + node.cleanup() + self.restore_node(backup_dir, 'node', node, options=["-j", "4"]) + + # Physical comparison + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_persistent_slot_for_stream_backup(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_size': '40MB'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "SELECT pg_create_physical_replication_slot('slot_1')") + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--slot=slot_1']) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--slot=slot_1']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_temp_slot_for_stream_backup(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_size': '40MB'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--temp-slot']) + + if self.get_version(node) < self.version_to_num('10.0'): + return unittest.skip('You need PostgreSQL >= 10 for this test') + else: + pg_receivexlog_path = self.get_bin_path('pg_receivewal') + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--slot=slot_1', '--temp-slot']) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_backup_concurrent_drop_table(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=1) + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--compress'], + gdb=True) + + gdb.set_breakpoint('backup_data_file') + gdb.run_until_break() + + node.safe_psql( + 'postgres', + 'DROP TABLE pgbench_accounts') + + # do checkpoint to guarantee filenode removal + node.safe_psql( + 'postgres', + 'CHECKPOINT') + + gdb.remove_all_breakpoints() + gdb.continue_execution_until_exit() + + show_backup = self.show_pb(backup_dir, 'node')[0] + + self.assertEqual(show_backup['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname, nodes=[node]) + + # @unittest.skip("skip") + def test_pg_11_adjusted_wal_segment_size(self): + """""" + if self.pg_config_version < self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL >= 11 for this test') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=[ + '--data-checksums', + '--wal-segsize=64'], + pg_options={ + 'min_wal_size': '128MB', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=5) + + # FULL STREAM backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + pgbench = node.pgbench(options=['-T', '5', '-c', '2']) + pgbench.wait() + + # PAGE STREAM backup + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--stream']) + + pgbench = node.pgbench(options=['-T', '5', '-c', '2']) + pgbench.wait() + + # DELTA STREAM backup + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + pgbench = node.pgbench(options=['-T', '5', '-c', '2']) + pgbench.wait() + + # FULL ARCHIVE backup + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '5', '-c', '2']) + pgbench.wait() + + # PAGE ARCHIVE backup + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '5', '-c', '2']) + pgbench.wait() + + # DELTA ARCHIVE backup + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='delta') + pgdata = self.pgdata_content(node.data_dir) + + # delete + output = self.delete_pb( + backup_dir, 'node', + options=[ + '--expired', + '--delete-wal', + '--retention-redundancy=1']) + + # validate + self.validate_pb(backup_dir) + + # merge + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # restore + node.cleanup() + self.restore_node( + backup_dir, 'node', node, backup_id=backup_id) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_sigint_handling(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True, + options=['--stream', '--log-level-file=LOG']) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + gdb.remove_all_breakpoints() + + gdb._execute('signal SIGINT') + gdb.continue_execution_until_error() + + backup_id = self.show_pb(backup_dir, 'node')[0]['id'] + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_sigterm_handling(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True, + options=['--stream', '--log-level-file=LOG']) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + gdb.remove_all_breakpoints() + + gdb._execute('signal SIGTERM') + gdb.continue_execution_until_error() + + backup_id = self.show_pb(backup_dir, 'node')[0]['id'] + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_sigquit_handling(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True, options=['--stream']) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + gdb.remove_all_breakpoints() + + gdb._execute('signal SIGQUIT') + gdb.continue_execution_until_error() + + backup_id = self.show_pb(backup_dir, 'node')[0]['id'] + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_drop_table(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + connect_1 = node.connect("postgres") + connect_1.execute( + "create table t_heap as select i" + " as id from generate_series(0,100) i") + connect_1.commit() + + connect_2 = node.connect("postgres") + connect_2.execute("SELECT * FROM t_heap") + connect_2.commit() + + # DROP table + connect_2.execute("DROP TABLE t_heap") + connect_2.commit() + + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_missing_file_permissions(self): + """""" + if os.name == 'nt': + return unittest.skip('Skipped because it is POSIX only test') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + relative_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('pg_class')").rstrip() + + full_path = os.path.join(node.data_dir, relative_path) + + os.chmod(full_path, 000) + + try: + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of missing permissions" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Cannot open file', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + os.chmod(full_path, 700) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_basic_missing_dir_permissions(self): + """""" + if os.name == 'nt': + return unittest.skip('Skipped because it is POSIX only test') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + full_path = os.path.join(node.data_dir, 'pg_twophase') + + os.chmod(full_path, 000) + + try: + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of missing permissions" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Cannot open directory', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + os.chmod(full_path, 700) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_backup_with_least_privileges_role(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums'], + pg_options={'archive_timeout': '30s'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + 'postgres', + 'CREATE DATABASE backupdb') + + if self.ptrack and node.major_version >= 12: + node.safe_psql( + "backupdb", + "CREATE EXTENSION ptrack WITH SCHEMA pg_catalog") + + # PG 9.5 + if self.get_version(node) < 90600: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.timestamptz(timestamp with time zone, integer) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + # PG 9.6 + elif self.get_version(node) > 90600 and self.get_version(node) < 100000: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.timestamptz(timestamp with time zone, integer) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_xlog_replay_location() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + # >= 10 + else: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + + if self.ptrack: + if node.major_version < 12: + for fname in [ + 'pg_catalog.oideq(oid, oid)', + 'pg_catalog.ptrack_version()', + 'pg_catalog.pg_ptrack_clear()', + 'pg_catalog.pg_ptrack_control_lsn()', + 'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)', + 'pg_catalog.pg_ptrack_get_and_clear(oid, oid)', + 'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)', + 'pg_catalog.pg_stop_backup()']: + + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION {0} " + "TO backup".format(fname)) + else: + fnames = [ + 'pg_catalog.ptrack_get_pagemapset(pg_lsn)', + 'pg_catalog.ptrack_init_lsn()' + ] + + for fname in fnames: + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION {0} " + "TO backup".format(fname)) + + if ProbackupTest.enterprise: + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_edition() TO backup") + + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_version() TO backup") + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + datname='backupdb', options=['--stream', '-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, + datname='backupdb', options=['-U', 'backup']) + + # PAGE + self.backup_node( + backup_dir, 'node', node, backup_type='page', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, backup_type='page', datname='backupdb', + options=['--stream', '-U', 'backup']) + + # DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, backup_type='delta', + datname='backupdb', options=['--stream', '-U', 'backup']) + + # PTRACK + if self.ptrack: + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + datname='backupdb', options=['--stream', '-U', 'backup']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_parent_choosing(self): + """ + PAGE3 <- RUNNING(parent should be FULL) + PAGE2 <- OK + PAGE1 <- CORRUPT + FULL + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + full_id = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + page1_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + page2_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGE1 to ERROR + self.change_backup_status(backup_dir, 'node', page1_id, 'ERROR') + + # PAGE3 + page3_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--log-level-file=LOG']) + + log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(log_file_path) as f: + log_file_content = f.read() + + self.assertIn( + "WARNING: Backup {0} has invalid parent: {1}. " + "Cannot be a parent".format(page2_id, page1_id), + log_file_content) + + self.assertIn( + "WARNING: Backup {0} has status: ERROR. " + "Cannot be a parent".format(page1_id), + log_file_content) + + self.assertIn( + "Parent backup: {0}".format(full_id), + log_file_content) + + self.assertEqual( + self.show_pb( + backup_dir, 'node', backup_id=page3_id)['parent-backup-id'], + full_id) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_parent_choosing_1(self): + """ + PAGE3 <- RUNNING(parent should be FULL) + PAGE2 <- OK + PAGE1 <- (missing) + FULL + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + full_id = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + page1_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + page2_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Delete PAGE1 + shutil.rmtree( + os.path.join(backup_dir, 'backups', 'node', page1_id)) + + # PAGE3 + page3_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--log-level-file=LOG']) + + log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(log_file_path) as f: + log_file_content = f.read() + + self.assertIn( + "WARNING: Backup {0} has missing parent: {1}. " + "Cannot be a parent".format(page2_id, page1_id), + log_file_content) + + self.assertIn( + "Parent backup: {0}".format(full_id), + log_file_content) + + self.assertEqual( + self.show_pb( + backup_dir, 'node', backup_id=page3_id)['parent-backup-id'], + full_id) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_parent_choosing_2(self): + """ + PAGE3 <- RUNNING(backup should fail) + PAGE2 <- OK + PAGE1 <- OK + FULL <- (missing) + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + full_id = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + page1_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + page2_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Delete FULL + shutil.rmtree( + os.path.join(backup_dir, 'backups', 'node', full_id)) + + # PAGE3 + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--log-level-file=LOG']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because FULL backup is missing" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: Valid backup on current timeline 1 is not found' in e.message and + 'ERROR: Create new full backup before an incremental one' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + self.show_pb( + backup_dir, 'node')[2]['status'], + 'ERROR') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_backup_with_less_privileges_role(self): + """ + check permissions correctness from documentation: + https://github.com/postgrespro/pg_probackup/blob/master/Documentation.md#configuring-the-database-cluster + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '30s', + 'archive_mode': 'always', + 'checkpoint_timeout': '60s', + 'wal_level': 'logical'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_config(backup_dir, 'node', options=['--archive-timeout=60s']) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + 'postgres', + 'CREATE DATABASE backupdb') + + if self.ptrack and node.major_version >= 12: + node.safe_psql( + 'backupdb', + 'CREATE EXTENSION ptrack') + + # PG 9.5 + if self.get_version(node) < 90600: + node.safe_psql( + 'backupdb', + "BEGIN; " + "CREATE ROLE backup WITH LOGIN; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; " + "COMMIT;" + ) + # PG 9.6 + elif self.get_version(node) > 90600 and self.get_version(node) < 100000: + node.safe_psql( + 'backupdb', + "BEGIN; " + "CREATE ROLE backup WITH LOGIN; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_xlog_replay_location() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; " + "COMMIT;" + ) + # >= 10 + else: + node.safe_psql( + 'backupdb', + "BEGIN; " + "CREATE ROLE backup WITH LOGIN; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup; " + "COMMIT;" + ) + + # enable STREAM backup + node.safe_psql( + 'backupdb', + 'ALTER ROLE backup WITH REPLICATION;') + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + datname='backupdb', options=['--stream', '-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, + datname='backupdb', options=['-U', 'backup']) + + # PAGE + self.backup_node( + backup_dir, 'node', node, backup_type='page', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, backup_type='page', datname='backupdb', + options=['--stream', '-U', 'backup']) + + # DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, backup_type='delta', + datname='backupdb', options=['--stream', '-U', 'backup']) + + # PTRACK + if self.ptrack: + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + datname='backupdb', options=['--stream', '-U', 'backup']) + + if self.get_version(node) < 90600: + self.del_test_dir(module_name, fname) + return + + # Restore as replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'node', replica) + self.set_replica(node, replica) + self.add_instance(backup_dir, 'replica', replica) + self.set_config( + backup_dir, 'replica', + options=['--archive-timeout=120s', '--log-level-console=LOG']) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + self.set_auto_conf(replica, {'hot_standby': 'on'}) + + # freeze bgwriter to get rid of RUNNING XACTS records + # bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0] + # gdb_checkpointer = self.gdb_attach(bgwriter_pid) + + copy_tree( + os.path.join(backup_dir, 'wal', 'node'), + os.path.join(backup_dir, 'wal', 'replica')) + + replica.slow_start(replica=True) + + # self.switch_wal_segment(node) + # self.switch_wal_segment(node) + + self.backup_node( + backup_dir, 'replica', replica, + datname='backupdb', options=['-U', 'backup']) + + # stream full backup from replica + self.backup_node( + backup_dir, 'replica', replica, + datname='backupdb', options=['--stream', '-U', 'backup']) + +# self.switch_wal_segment(node) + + # PAGE backup from replica + self.switch_wal_segment(node) + self.backup_node( + backup_dir, 'replica', replica, backup_type='page', + datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s']) + + self.backup_node( + backup_dir, 'replica', replica, backup_type='page', + datname='backupdb', options=['--stream', '-U', 'backup']) + + # DELTA backup from replica + self.switch_wal_segment(node) + self.backup_node( + backup_dir, 'replica', replica, backup_type='delta', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'replica', replica, backup_type='delta', + datname='backupdb', options=['--stream', '-U', 'backup']) + + # PTRACK backup from replica + if self.ptrack: + self.switch_wal_segment(node) + self.backup_node( + backup_dir, 'replica', replica, backup_type='ptrack', + datname='backupdb', options=['-U', 'backup']) + self.backup_node( + backup_dir, 'replica', replica, backup_type='ptrack', + datname='backupdb', options=['--stream', '-U', 'backup']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_issue_132(self): + """ + https://github.com/postgrespro/pg_probackup/issues/132 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + with node.connect("postgres") as conn: + for i in range(50000): + conn.execute( + "CREATE TABLE t_{0} as select 1".format(i)) + conn.commit() + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + exit(1) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_issue_132_1(self): + """ + https://github.com/postgrespro/pg_probackup/issues/132 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + # TODO: check version of old binary, it should be 2.1.4, 2.1.5 or 2.2.1 + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + with node.connect("postgres") as conn: + for i in range(30000): + conn.execute( + "CREATE TABLE t_{0} as select 1".format(i)) + conn.commit() + + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream'], old_binary=True) + + delta_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--stream'], old_binary=True) + + node.cleanup() + + # make sure that new binary can detect corruption + try: + self.validate_pb(backup_dir, 'node', backup_id=full_id) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because FULL backup is CORRUPT" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is a victim of metadata corruption'.format(full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.validate_pb(backup_dir, 'node', backup_id=delta_id) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because FULL backup is CORRUPT" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is a victim of metadata corruption'.format(full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', self.show_pb(backup_dir, 'node', full_id)['status'], + 'Backup STATUS should be "CORRUPT"') + + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', delta_id)['status'], + 'Backup STATUS should be "ORPHAN"') + + # check that revalidation is working correctly + try: + self.restore_node( + backup_dir, 'node', node, backup_id=delta_id) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because FULL backup is CORRUPT" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is a victim of metadata corruption'.format(full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', self.show_pb(backup_dir, 'node', full_id)['status'], + 'Backup STATUS should be "CORRUPT"') + + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', delta_id)['status'], + 'Backup STATUS should be "ORPHAN"') + + # check that '--no-validate' do not allow to restore ORPHAN backup +# try: +# self.restore_node( +# backup_dir, 'node', node, backup_id=delta_id, +# options=['--no-validate']) +# # we should die here because exception is what we expect to happen +# self.assertEqual( +# 1, 0, +# "Expecting Error because FULL backup is CORRUPT" +# "\n Output: {0} \n CMD: {1}".format( +# repr(self.output), self.cmd)) +# except ProbackupException as e: +# self.assertIn( +# 'Insert data', +# e.message, +# '\n Unexpected Error Message: {0}\n CMD: {1}'.format( +# repr(e.message), self.cmd)) + + node.cleanup() + + output = self.restore_node( + backup_dir, 'node', node, backup_id=full_id, options=['--force']) + + self.assertIn( + 'WARNING: Backup {0} has status: CORRUPT'.format(full_id), + output) + + self.assertIn( + 'WARNING: Backup {0} is corrupt.'.format(full_id), + output) + + self.assertIn( + 'WARNING: Backup {0} is not valid, restore is forced'.format(full_id), + output) + + self.assertIn( + 'INFO: Restore of backup {0} completed.'.format(full_id), + output) + + node.cleanup() + + output = self.restore_node( + backup_dir, 'node', node, backup_id=delta_id, options=['--force']) + + self.assertIn( + 'WARNING: Backup {0} is orphan.'.format(delta_id), + output) + + self.assertIn( + 'WARNING: Backup {0} is not valid, restore is forced'.format(full_id), + output) + + self.assertIn( + 'WARNING: Backup {0} is not valid, restore is forced'.format(delta_id), + output) + + self.assertIn( + 'INFO: Restore of backup {0} completed.'.format(delta_id), + output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_note_sanity(self): + """ + test that adding note to backup works as expected + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--log-level-file=LOG', '--note=test_note']) + + show_backups = self.show_pb(backup_dir, 'node') + + print(self.show_pb(backup_dir, as_text=True, as_json=True)) + + self.assertEqual(show_backups[0]['note'], "test_note") + + self.set_backup(backup_dir, 'node', backup_id, options=['--note=none']) + + backup_meta = self.show_pb(backup_dir, 'node', backup_id) + + self.assertNotIn( + 'note', + backup_meta) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_parent_backup_made_by_newer_version(self): + """incremental backup with parent made by newer version""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + control_file = os.path.join( + backup_dir, "backups", "node", backup_id, + "backup.control") + + version = self.probackup_version + fake_new_version = str(int(version.split('.')[0]) + 1) + '.0.0' + + with open(control_file, 'r') as f: + data = f.read(); + + data = data.replace(version, fake_new_version) + + with open(control_file, 'w') as f: + f.write(data); + + try: + self.backup_node(backup_dir, 'node', node, backup_type="page") + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because incremental backup should not be possible " + "if parent made by newer version.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "pg_probackup do not guarantee to be forward compatible. " + "Please upgrade pg_probackup binary.", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['status'], "ERROR") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_issue_203(self): + """ + https://github.com/postgrespro/pg_probackup/issues/203 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + with node.connect("postgres") as conn: + for i in range(1000000): + conn.execute( + "CREATE TABLE t_{0} as select 1".format(i)) + conn.commit() + + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream', '-j2']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', + node_restored, data_dir=node_restored.data_dir) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/backup_test.py b/tests/backup_test.py deleted file mode 100644 index 1fa74643a..000000000 --- a/tests/backup_test.py +++ /dev/null @@ -1,522 +0,0 @@ -import unittest -import os -from time import sleep -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException -from .helpers.cfs_helpers import find_by_name - - -module_name = 'backup' - - -class BackupTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - # PGPRO-707 - def test_backup_modes_archive(self): - """standart backup modes with ARCHIVE WAL method""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - show_backup = self.show_pb(backup_dir, 'node')[0] - - self.assertEqual(show_backup['status'], "OK") - self.assertEqual(show_backup['backup-mode'], "FULL") - - # postmaster.pid and postmaster.opts shouldn't be copied - excluded = True - db_dir = os.path.join( - backup_dir, "backups", 'node', backup_id, "database") - - for f in os.listdir(db_dir): - if ( - os.path.isfile(os.path.join(db_dir, f)) and - ( - f == "postmaster.pid" or - f == "postmaster.opts" - ) - ): - excluded = False - self.assertEqual(excluded, True) - - # page backup mode - page_backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="page") - - # print self.show_pb(node) - show_backup = self.show_pb(backup_dir, 'node')[1] - self.assertEqual(show_backup['status'], "OK") - self.assertEqual(show_backup['backup-mode'], "PAGE") - - # Check parent backup - self.assertEqual( - backup_id, - self.show_pb( - backup_dir, 'node', - backup_id=show_backup['id'])["parent-backup-id"]) - - # ptrack backup mode - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - - show_backup = self.show_pb(backup_dir, 'node')[2] - self.assertEqual(show_backup['status'], "OK") - self.assertEqual(show_backup['backup-mode'], "PTRACK") - - # Check parent backup - self.assertEqual( - page_backup_id, - self.show_pb( - backup_dir, 'node', - backup_id=show_backup['id'])["parent-backup-id"]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_smooth_checkpoint(self): - """full backup with smooth checkpoint""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.backup_node( - backup_dir, 'node', node, - options=["-C"]) - self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") - node.stop() - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_incremental_backup_without_full(self): - """page-level backup without validated full backup""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - try: - self.backup_node(backup_dir, 'node', node, backup_type="page") - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because page backup should not be possible " - "without valid full backup.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertIn( - "ERROR: Valid backup on current timeline is not found. " - "Create new FULL backup before an incremental one.", - e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - sleep(1) - - try: - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because page backup should not be possible " - "without valid full backup.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertIn( - "ERROR: Valid backup on current timeline is not found. " - "Create new FULL backup before an incremental one.", - e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - self.assertEqual( - self.show_pb(backup_dir, 'node')[0]['status'], - "ERROR") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_incremental_backup_corrupt_full(self): - """page-level backup with corrupted full backup""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - file = os.path.join( - backup_dir, "backups", "node", backup_id, - "database", "postgresql.conf") - os.remove(file) - - try: - self.validate_pb(backup_dir, 'node') - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because of validation of corrupted backup.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - "INFO: Validate backups of the instance 'node'\n" in e.message and - "WARNING: Backup file \"{0}\" is not found\n".format( - file) in e.message and - "WARNING: Backup {0} data files are corrupted\n".format( - backup_id) in e.message and - "WARNING: Some backups are not valid\n" in e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - try: - self.backup_node(backup_dir, 'node', node, backup_type="page") - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because page backup should not be possible " - "without valid full backup.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertIn( - "ERROR: Valid backup on current timeline is not found. " - "Create new FULL backup before an incremental one.", - e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - self.assertEqual( - self.show_pb(backup_dir, 'node', backup_id)['status'], "CORRUPT") - self.assertEqual( - self.show_pb(backup_dir, 'node')[1]['status'], "ERROR") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_threads(self): - """ptrack multi thread backup mode""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.backup_node( - backup_dir, 'node', node, - backup_type="full", options=["-j", "4"]) - self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") - - self.backup_node( - backup_dir, 'node', node, - backup_type="ptrack", options=["-j", "4"]) - self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_threads_stream(self): - """ptrack multi thread backup mode and stream""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on', - 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream"]) - - self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") - self.backup_node( - backup_dir, 'node', node, - backup_type="ptrack", options=["-j", "4", "--stream"]) - self.assertEqual(self.show_pb(backup_dir, 'node')[1]['status'], "OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_page_corruption_heal_via_ptrack_1(self): - """make node, corrupt some page, check that backup failed""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.backup_node( - backup_dir, 'node', node, - backup_type="full", options=["-j", "4", "--stream"]) - - node.safe_psql( - "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") - node.safe_psql( - "postgres", - "CHECKPOINT;") - - heap_path = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - - with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f: - f.seek(9000) - f.write(b"bla") - f.flush() - f.close - - self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream", '--log-level-file=verbose']) - - # open log file and check - with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: - log_content = f.read() - self.assertIn('block 1, try to fetch via SQL', log_content) - self.assertIn('SELECT pg_catalog.pg_ptrack_get_block', log_content) - f.close - - self.assertTrue( - self.show_pb(backup_dir, 'node')[1]['status'] == 'OK', - "Backup Status should be OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_page_corruption_heal_via_ptrack_2(self): - """make node, corrupt some page, check that backup failed""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream"]) - - node.safe_psql( - "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") - node.safe_psql( - "postgres", - "CHECKPOINT;") - - heap_path = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - node.stop() - - with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f: - f.seek(9000) - f.write(b"bla") - f.flush() - f.close - node.start() - - try: - self.backup_node( - backup_dir, 'node', node, - backup_type="full", options=["-j", "4", "--stream"]) - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because of page " - "corruption in PostgreSQL instance.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - "WARNING: File" in e.message and - "blknum" in e.message and - "have wrong checksum" in e.message and - "try to fetch via SQL" in e.message and - "WARNING: page verification failed, " - "calculated checksum" in e.message and - "ERROR: query failed: " - "ERROR: invalid page in block" in e.message and - "query was: SELECT pg_catalog.pg_ptrack_get_block_2" in e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - self.assertTrue( - self.show_pb(backup_dir, 'node')[1]['status'] == 'ERROR', - "Backup Status should be ERROR") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_tablespace_in_pgdata_pgpro_1376(self): - """PGPRO-1376 """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node( - node, 'tblspace1', - tblspc_path=( - os.path.join( - node.data_dir, 'somedirectory', '100500')) - ) - - self.create_tblspace_in_node( - node, 'tblspace2', - tblspc_path=(os.path.join(node.data_dir)) - ) - - node.safe_psql( - "postgres", - "create table t_heap1 tablespace tblspace1 as select 1 as id, " - "md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") - - node.safe_psql( - "postgres", - "create table t_heap2 tablespace tblspace2 as select 1 as id, " - "md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") - - try: - self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream"]) - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because of too many levels " - "of symbolic linking\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'Too many levels of symbolic links' in e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - node.safe_psql( - "postgres", - "drop table t_heap2") - node.safe_psql( - "postgres", - "drop tablespace tblspace2") - - self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream"]) - - pgdata = self.pgdata_content(node.data_dir) - - relfilenode = node.safe_psql( - "postgres", - "select 't_heap1'::regclass::oid" - ).rstrip() - - list = [] - for root, dirs, files in os.walk(backup_dir): - for file in files: - if file == relfilenode: - path = os.path.join(root, file) - list = list + [path] - - # We expect that relfilenode occures only once - if len(list) > 1: - message = "" - for string in list: - message = message + string + "\n" - self.assertEqual( - 1, 0, - "Following file copied twice by backup:\n {0}".format( - message) - ) - - node.cleanup() - - self.restore_node( - backup_dir, 'node', node, options=["-j", "4"]) - - if self.paranoia: - pgdata_restored = self.pgdata_content(node.data_dir) - self.compare_pgdata(pgdata, pgdata_restored) diff --git a/tests/cfs_backup.py b/tests/cfs_backup.py index 412320327..5a3665518 100644 --- a/tests/cfs_backup.py +++ b/tests/cfs_backup.py @@ -22,7 +22,6 @@ def setUp(self): set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'ptrack_enable': 'on', 'cfs_encryption': 'off', 'max_wal_senders': '2', @@ -34,7 +33,7 @@ def setUp(self): self.add_instance(self.backup_dir, 'node', self.node) self.set_archiving(self.backup_dir, 'node', self.node) - self.node.start() + self.node.slow_start() self.create_tblspace_in_node(self.node, tblspace_name, cfs=True) @@ -745,7 +744,7 @@ def test_multiple_segments(self): self.restore_node( self.backup_dir, 'node', self.node, backup_id=backup_id_full, options=["-j", "4"]) - self.node.start() + self.node.slow_start() self.assertEqual( full_result, self.node.safe_psql("postgres", "SELECT * FROM t_heap"), @@ -760,7 +759,7 @@ def test_multiple_segments(self): self.restore_node( self.backup_dir, 'node', self.node, backup_id=backup_id_page, options=["-j", "4"]) - self.node.start() + self.node.slow_start() self.assertEqual( page_result, self.node.safe_psql("postgres", "SELECT * FROM t_heap"), @@ -879,7 +878,7 @@ def test_multiple_segments_in_multiple_tablespaces(self): self.restore_node( self.backup_dir, 'node', self.node, backup_id=backup_id_full, options=["-j", "4"]) - self.node.start() + self.node.slow_start() self.assertEqual( full_result_1, self.node.safe_psql("postgres", "SELECT * FROM t_heap_1"), @@ -905,7 +904,7 @@ def test_multiple_segments_in_multiple_tablespaces(self): self.restore_node( self.backup_dir, 'node', self.node, backup_id=backup_id_page, options=["-j", "4"]) - self.node.start() + self.node.slow_start() self.assertEqual( page_result_1, self.node.safe_psql("postgres", "SELECT * FROM t_heap_1"), diff --git a/tests/cfs_restore.py b/tests/cfs_restore.py index 73553a305..07cf891aa 100644 --- a/tests/cfs_restore.py +++ b/tests/cfs_restore.py @@ -32,10 +32,8 @@ def setUp(self): set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', # 'ptrack_enable': 'on', 'cfs_encryption': 'off', - 'max_wal_senders': '2' } ) @@ -43,7 +41,7 @@ def setUp(self): self.add_instance(self.backup_dir, 'node', self.node) self.set_archiving(self.backup_dir, 'node', self.node) - self.node.start() + self.node.slow_start() self.create_tblspace_in_node(self.node, tblspace_name, cfs=True) self.add_data_in_cluster() @@ -93,7 +91,7 @@ def test_restore_empty_tablespace_from_fullbackup(self): ) try: - self.node.start() + self.node.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( @@ -151,7 +149,7 @@ def test_restore_from_fullbackup_to_old_location(self): "ERROR: File pg_compression not found in tablespace dir" ) try: - self.node.start() + self.node.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( @@ -189,7 +187,7 @@ def test_restore_from_fullbackup_to_old_location_3_jobs(self): "ERROR: File pg_compression not found in backup dir" ) try: - self.node.start() + self.node.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( @@ -213,11 +211,12 @@ def test_restore_from_fullbackup_to_new_location(self): self.node.cleanup() shutil.rmtree(self.get_tblspace_path(self.node, tblspace_name)) - self.node_new = self.make_simple_node(base_dir="{0}/{1}/node_new_location".format(module_name, self.fname)) - self.node_new.cleanup() + node_new = self.make_simple_node(base_dir="{0}/{1}/node_new_location".format(module_name, self.fname)) + node_new.cleanup() try: - self.restore_node(self.backup_dir, 'node', self.node_new, backup_id=self.backup_id) + self.restore_node(self.backup_dir, 'node', node_new, backup_id=self.backup_id) + self.set_auto_conf(node_new, {'port': node_new.port}) except ProbackupException as e: self.fail( "ERROR: Restore from full backup failed. \n {0} \n {1}".format( @@ -230,7 +229,7 @@ def test_restore_from_fullbackup_to_new_location(self): "ERROR: File pg_compression not found in backup dir" ) try: - self.node_new.start() + node_new.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( @@ -240,10 +239,10 @@ def test_restore_from_fullbackup_to_new_location(self): ) self.assertEqual( - repr(self.node.safe_psql("postgres", "SELECT * FROM %s" % 't1')), + repr(node_new.safe_psql("postgres", "SELECT * FROM %s" % 't1')), repr(self.table_t1) ) - self.node_new.cleanup() + node_new.cleanup() # @unittest.expectedFailure # @unittest.skip("skip") @@ -255,11 +254,12 @@ def test_restore_from_fullbackup_to_new_location_5_jobs(self): self.node.cleanup() shutil.rmtree(self.get_tblspace_path(self.node, tblspace_name)) - self.node_new = self.make_simple_node(base_dir="{0}/{1}/node_new_location".format(module_name, self.fname)) - self.node_new.cleanup() + node_new = self.make_simple_node(base_dir="{0}/{1}/node_new_location".format(module_name, self.fname)) + node_new.cleanup() try: - self.restore_node(self.backup_dir, 'node', self.node_new, backup_id=self.backup_id, options=['-j', '5']) + self.restore_node(self.backup_dir, 'node', node_new, backup_id=self.backup_id, options=['-j', '5']) + self.set_auto_conf(node_new, {'port': node_new.port}) except ProbackupException as e: self.fail( "ERROR: Restore from full backup failed. \n {0} \n {1}".format( @@ -272,7 +272,7 @@ def test_restore_from_fullbackup_to_new_location_5_jobs(self): "ERROR: File pg_compression not found in backup dir" ) try: - self.node_new.start() + node_new.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( @@ -282,10 +282,10 @@ def test_restore_from_fullbackup_to_new_location_5_jobs(self): ) self.assertEqual( - repr(self.node.safe_psql("postgres", "SELECT * FROM %s" % 't1')), + repr(node_new.safe_psql("postgres", "SELECT * FROM %s" % 't1')), repr(self.table_t1) ) - self.node_new.cleanup() + node_new.cleanup() # @unittest.expectedFailure # @unittest.skip("skip") @@ -319,7 +319,7 @@ def test_restore_from_fullbackup_to_old_location_tablespace_new_location(self): "ERROR: File pg_compression not found in new tablespace location" ) try: - self.node.start() + self.node.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( @@ -365,7 +365,7 @@ def test_restore_from_fullbackup_to_old_location_tablespace_new_location_3_jobs( "ERROR: File pg_compression not found in new tablespace location" ) try: - self.node.start() + self.node.slow_start() except ProbackupException as e: self.fail( "ERROR: Instance not started after restore. \n {0} \n {1}".format( diff --git a/tests/checkdb.py b/tests/checkdb.py new file mode 100644 index 000000000..6c25293ab --- /dev/null +++ b/tests/checkdb.py @@ -0,0 +1,660 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from datetime import datetime, timedelta +import subprocess +from testgres import QueryException +import shutil +import sys +import time + + +module_name = 'checkdb' + + +class CheckdbTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + def test_checkdb_amcheck_only_sanity(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select i" + " as id from generate_series(0,100) i") + + node.safe_psql( + "postgres", + "create index on t_heap(id)") + + try: + node.safe_psql( + "postgres", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "postgres", + "create extension amcheck_next") + + log_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log') + + # simple sanity + try: + self.checkdb_node( + options=['--skip-block-validation']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because --amcheck option is missing\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Option '--skip-block-validation' must be " + "used with '--amcheck' option", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # simple sanity + output = self.checkdb_node( + options=[ + '--amcheck', + '--skip-block-validation', + '-d', 'postgres', '-p', str(node.port)]) + + self.assertIn( + 'INFO: checkdb --amcheck finished successfully', + output) + self.assertIn( + 'All checked indexes are valid', + output) + + # logging to file sanity + try: + self.checkdb_node( + options=[ + '--amcheck', + '--skip-block-validation', + '--log-level-file=verbose', + '-d', 'postgres', '-p', str(node.port)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because log_directory missing\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Cannot save checkdb logs to a file. " + "You must specify --log-directory option when " + "running checkdb with --log-level-file option enabled", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # If backup_dir provided, then instance name must be + # provided too + try: + self.checkdb_node( + backup_dir, + options=[ + '--amcheck', + '--skip-block-validation', + '--log-level-file=verbose', + '-d', 'postgres', '-p', str(node.port)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because log_directory missing\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: required parameter not specified: --instance", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # checkdb can use default or set in config values, + # if backup_dir and instance name are provided + self.checkdb_node( + backup_dir, + 'node', + options=[ + '--amcheck', + '--skip-block-validation', + '--log-level-file=verbose', + '-d', 'postgres', '-p', str(node.port)]) + + # check that file present and full of messages + os.path.isfile(log_file_path) + with open(log_file_path) as f: + log_file_content = f.read() + self.assertIn( + 'INFO: checkdb --amcheck finished successfully', + log_file_content) + self.assertIn( + 'VERBOSE: (query)', + log_file_content) + os.unlink(log_file_path) + + # log-level-file and log-directory are provided + self.checkdb_node( + backup_dir, + 'node', + options=[ + '--amcheck', + '--skip-block-validation', + '--log-level-file=verbose', + '--log-directory={0}'.format( + os.path.join(backup_dir, 'log')), + '-d', 'postgres', '-p', str(node.port)]) + + # check that file present and full of messages + os.path.isfile(log_file_path) + with open(log_file_path) as f: + log_file_content = f.read() + self.assertIn( + 'INFO: checkdb --amcheck finished successfully', + log_file_content) + self.assertIn( + 'VERBOSE: (query)', + log_file_content) + os.unlink(log_file_path) + + gdb = self.checkdb_node( + gdb=True, + options=[ + '--amcheck', + '--skip-block-validation', + '--log-level-file=verbose', + '--log-directory={0}'.format( + os.path.join(backup_dir, 'log')), + '-d', 'postgres', '-p', str(node.port)]) + + gdb.set_breakpoint('amcheck_one_index') + gdb.run_until_break() + + node.safe_psql( + "postgres", + "drop table t_heap") + + gdb.remove_all_breakpoints() + + gdb.continue_execution_until_exit() + + # check that message about missing index is present + with open(log_file_path) as f: + log_file_content = f.read() + self.assertIn( + 'ERROR: checkdb --amcheck finished with failure', + log_file_content) + self.assertIn( + "WARNING: Thread [1]. Amcheck failed in database 'postgres' " + "for index: 'public.t_heap_id_idx':", + log_file_content) + self.assertIn( + 'ERROR: could not open relation with OID', + log_file_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_checkdb_amcheck_only_sanity(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # create two databases + node.safe_psql("postgres", "create database db1") + try: + node.safe_psql( + "db1", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "db1", + "create extension amcheck_next") + + node.safe_psql("postgres", "create database db2") + try: + node.safe_psql( + "db2", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "db2", + "create extension amcheck_next") + + # init pgbench in two databases and corrupt both indexes + node.pgbench_init(scale=5, dbname='db1') + node.pgbench_init(scale=5, dbname='db2') + + node.safe_psql( + "db2", + "alter index pgbench_accounts_pkey rename to some_index") + + index_path_1 = os.path.join( + node.data_dir, + node.safe_psql( + "db1", + "select pg_relation_filepath('pgbench_accounts_pkey')").rstrip()) + + index_path_2 = os.path.join( + node.data_dir, + node.safe_psql( + "db2", + "select pg_relation_filepath('some_index')").rstrip()) + + try: + self.checkdb_node( + options=[ + '--amcheck', + '--skip-block-validation', + '-d', 'postgres', '-p', str(node.port)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because some db was not amchecked" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Some databases were not amchecked", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + node.stop() + + # Let`s do index corruption + with open(index_path_1, "rb+", 0) as f: + f.seek(42000) + f.write(b"blablahblahs") + f.flush() + f.close + + with open(index_path_2, "rb+", 0) as f: + f.seek(42000) + f.write(b"blablahblahs") + f.flush() + f.close + + node.slow_start() + + log_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log') + + try: + self.checkdb_node( + options=[ + '--amcheck', + '--skip-block-validation', + '--log-level-file=verbose', + '--log-directory={0}'.format( + os.path.join(backup_dir, 'log')), + '-d', 'postgres', '-p', str(node.port)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because some db was not amchecked" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: checkdb --amcheck finished with failure", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # corruption of both indexes in db1 and db2 must be detected + # also the that amcheck is not installed in 'postgres' + # should be logged + with open(log_file_path) as f: + log_file_content = f.read() + self.assertIn( + "WARNING: Thread [1]. Amcheck failed in database 'db1' " + "for index: 'public.pgbench_accounts_pkey':", + log_file_content) + + self.assertIn( + "WARNING: Thread [1]. Amcheck failed in database 'db2' " + "for index: 'public.some_index':", + log_file_content) + + self.assertIn( + "ERROR: checkdb --amcheck finished with failure", + log_file_content) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_checkdb_block_validation_sanity(self): + """make node, corrupt some pages, check that checkdb failed""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + node.safe_psql( + "postgres", + "CHECKPOINT;") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # sanity + try: + self.checkdb_node() + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because pgdata must be specified\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: required parameter not specified: PGDATA (-D, --pgdata)", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.checkdb_node( + data_dir=node.data_dir, + options=['-d', 'postgres', '-p', str(node.port)]) + + self.checkdb_node( + backup_dir, 'node', + options=['-d', 'postgres', '-p', str(node.port)]) + + heap_full_path = os.path.join(node.data_dir, heap_path) + + with open(heap_full_path, "rb+", 0) as f: + f.seek(9000) + f.write(b"bla") + f.flush() + f.close + + with open(heap_full_path, "rb+", 0) as f: + f.seek(42000) + f.write(b"bla") + f.flush() + f.close + + try: + self.checkdb_node( + backup_dir, 'node', + options=['-d', 'postgres', '-p', str(node.port)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of data corruption\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Checkdb failed", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Corruption detected in file "{0}", block 1'.format( + os.path.normpath(heap_full_path)), + e.message) + + self.assertIn( + 'WARNING: Corruption detected in file "{0}", block 5'.format( + os.path.normpath(heap_full_path)), + e.message) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_checkdb_sigint_handling(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + try: + node.safe_psql( + "postgres", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "postgres", + "create extension amcheck_next") + + # FULL backup + gdb = self.checkdb_node( + backup_dir, 'node', gdb=True, + options=[ + '-d', 'postgres', '-j', '4', + '--skip-block-validation', + '--amcheck', '-p', str(node.port)]) + + gdb.set_breakpoint('amcheck_one_index') + gdb.run_until_break() + + gdb.continue_execution_until_break(10) + gdb.remove_all_breakpoints() + + gdb._execute('signal SIGINT') + gdb.continue_execution_until_error() + + with open(node.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('could not receive data from client', output) + self.assertNotIn('could not send data to client', output) + self.assertNotIn('connection to client lost', output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_checkdb_with_least_privileges(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + 'postgres', + 'CREATE DATABASE backupdb') + + try: + node.safe_psql( + "backupdb", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "backupdb", + "create extension amcheck_next") + + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC;") + + # PG 9.5 + if self.get_version(node) < 90600: + node.safe_psql( + 'backupdb', + 'CREATE ROLE backup WITH LOGIN; ' + 'GRANT CONNECT ON DATABASE backupdb to backup; ' + 'GRANT USAGE ON SCHEMA pg_catalog TO backup; ' + 'GRANT USAGE ON SCHEMA public TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_am TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_class TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_index TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_namespace TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.namene(name, name) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.int8(integer) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.oideq(oid, oid) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.charne("char", "char") TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; ' + 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass) TO backup; ' + 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass, bool) TO backup;' + ) + # PG 9.6 + elif self.get_version(node) > 90600 and self.get_version(node) < 100000: + node.safe_psql( + 'backupdb', + 'CREATE ROLE backup WITH LOGIN; ' + 'GRANT CONNECT ON DATABASE backupdb to backup; ' + 'GRANT USAGE ON SCHEMA pg_catalog TO backup; ' + 'GRANT USAGE ON SCHEMA public TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_am TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_class TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_index TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_namespace TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.namene(name, name) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.int8(integer) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.oideq(oid, oid) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.charne("char", "char") TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; ' + 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass) TO backup; ' + 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass, bool) TO backup;' + ) + # >= 10 + else: + node.safe_psql( + 'backupdb', + 'CREATE ROLE backup WITH LOGIN; ' + 'GRANT CONNECT ON DATABASE backupdb to backup; ' + 'GRANT USAGE ON SCHEMA pg_catalog TO backup; ' + 'GRANT USAGE ON SCHEMA public TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_am TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_class TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_index TO backup; ' + 'GRANT SELECT ON TABLE pg_catalog.pg_namespace TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.namene(name, name) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.int8(integer) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.oideq(oid, oid) TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.charne("char", "char") TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; ' + 'GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; ' + 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass) TO backup; ' + 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass, bool) TO backup;' + ) + +# if ProbackupTest.enterprise: +# node.safe_psql( +# "backupdb", +# "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_edition() TO backup") +# +# node.safe_psql( +# "backupdb", +# "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_version() TO backup") + + # checkdb + try: + self.checkdb_node( + backup_dir, 'node', + options=[ + '--amcheck', '-U', 'backup', + '-d', 'backupdb', '-p', str(node.port)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because permissions are missing\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "INFO: Amcheck succeeded for database 'backupdb'", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + "WARNING: Extension 'amcheck' or 'amcheck_next' are " + "not installed in database postgres", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + "ERROR: Some databases were not amchecked", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) diff --git a/tests/compatibility.py b/tests/compatibility.py new file mode 100644 index 000000000..d2db2be28 --- /dev/null +++ b/tests/compatibility.py @@ -0,0 +1,1336 @@ +import unittest +import subprocess +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from sys import exit +import shutil + +module_name = 'compatibility' + + +class CompatibilityTest(ProbackupTest, unittest.TestCase): + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_page(self): + """Description in jira issue PGPRO-434""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=10) + + # FULL backup with old binary + self.backup_node( + backup_dir, 'node', node, old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + self.show_pb(backup_dir) + + self.validate_pb(backup_dir) + + # RESTORE old FULL with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Page BACKUP with old binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"] + ) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='page', + old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Page BACKUP with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + node.safe_psql( + 'postgres', + 'create table tmp as select * from pgbench_accounts where aid < 1000') + + node.safe_psql( + 'postgres', + 'delete from pgbench_accounts') + + node.safe_psql( + 'postgres', + 'VACUUM') + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + node.safe_psql( + 'postgres', + 'insert into pgbench_accounts select * from pgbench_accounts') + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_delta(self): + """Description in jira issue PGPRO-434""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=10) + + # FULL backup with old binary + self.backup_node( + backup_dir, 'node', node, old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + self.show_pb(backup_dir) + + self.validate_pb(backup_dir) + + # RESTORE old FULL with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta BACKUP with old binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"] + ) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='delta', + old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta BACKUP with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"] + ) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node, backup_type='delta') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + node.safe_psql( + 'postgres', + 'create table tmp as select * from pgbench_accounts where aid < 1000') + + node.safe_psql( + 'postgres', + 'delete from pgbench_accounts') + + node.safe_psql( + 'postgres', + 'VACUUM') + + self.backup_node(backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + node.safe_psql( + 'postgres', + 'insert into pgbench_accounts select * from pgbench_accounts') + + self.backup_node(backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_ptrack(self): + """Description in jira issue PGPRO-434""" + + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + node.pgbench_init(scale=10) + + # FULL backup with old binary + self.backup_node( + backup_dir, 'node', node, old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + self.show_pb(backup_dir) + + self.validate_pb(backup_dir) + + # RESTORE old FULL with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # ptrack BACKUP with old binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"] + ) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--recovery-target=latest", + "--recovery-target-action=promote"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Ptrack BACKUP with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"] + ) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--recovery-target=latest", + "--recovery-target-action=promote"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_compression(self): + """Description in jira issue PGPRO-434""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=10) + + # FULL backup with OLD binary + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + # restore OLD FULL with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # PAGE backup with OLD binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + old_binary=True, + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # PAGE backup with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta backup with old binary + self.delete_pb(backup_dir, 'node', backup_id) + + self.backup_node( + backup_dir, 'node', node, + old_binary=True, + options=['--compress']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=['--compress'], + old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta backup with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_merge(self): + """ + Create node, take FULL and PAGE backups with old binary, + merge them with new binary + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + # FULL backup with OLD binary + self.backup_node( + backup_dir, 'node', node, + old_binary=True) + + node.pgbench_init(scale=1) + + # PAGE backup with OLD binary + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + self.merge_backup(backup_dir, "node", backup_id) + + self.show_pb(backup_dir, as_text=True, as_json=False) + + # restore OLD FULL with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_merge_1(self): + """ + Create node, take FULL and PAGE backups with old binary, + merge them with new binary. + old binary version =< 2.2.7 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=20) + + # FULL backup with OLD binary + self.backup_node(backup_dir, 'node', node, old_binary=True) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "1", "-T", "10", "--no-vacuum"]) + pgbench.wait() + pgbench.stdout.close() + + # PAGE1 backup with OLD binary + self.backup_node( + backup_dir, 'node', node, backup_type='page', old_binary=True) + + node.safe_psql( + 'postgres', + 'DELETE from pgbench_accounts') + + node.safe_psql( + 'postgres', + 'VACUUM pgbench_accounts') + + # PAGE2 backup with OLD binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', old_binary=True) + + pgdata = self.pgdata_content(node.data_dir) + + # merge chain created by old binary with new binary + output = self.merge_backup(backup_dir, "node", backup_id) + + # check that in-place is disabled + self.assertIn( + "WARNING: In-place merge is disabled " + "because of program versions mismatch", output) + + # restore merged backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_merge_2(self): + """ + Create node, take FULL and PAGE backups with old binary, + merge them with new binary. + old binary version =< 2.2.7 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=50) + + node.safe_psql( + 'postgres', + 'VACUUM pgbench_accounts') + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # FULL backup with OLD binary + self.backup_node(backup_dir, 'node', node, old_binary=True) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "1", "-T", "10", "--no-vacuum"]) + pgbench.wait() + pgbench.stdout.close() + + # PAGE1 backup with OLD binary + page1 = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True) + + pgdata1 = self.pgdata_content(node.data_dir) + + node.safe_psql( + 'postgres', + "DELETE from pgbench_accounts where ctid > '(10,1)'") + + # PAGE2 backup with OLD binary + page2 = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True) + + pgdata2 = self.pgdata_content(node.data_dir) + + # PAGE3 backup with OLD binary + page3 = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True) + + pgdata3 = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "1", "-T", "10", "--no-vacuum"]) + pgbench.wait() + pgbench.stdout.close() + + # PAGE4 backup with NEW binary + page4 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + pgdata4 = self.pgdata_content(node.data_dir) + + # merge backups one by one and check data correctness + # merge PAGE1 + self.merge_backup( + backup_dir, "node", page1, options=['--log-level-file=VERBOSE']) + + # check data correctness for PAGE1 + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, backup_id=page1, + options=['--log-level-file=VERBOSE']) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata1, pgdata_restored) + + # merge PAGE2 + self.merge_backup(backup_dir, "node", page2) + + # check data correctness for PAGE2 + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored, backup_id=page2) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata2, pgdata_restored) + + # merge PAGE3 + self.show_pb(backup_dir, 'node', page3) + self.merge_backup(backup_dir, "node", page3) + self.show_pb(backup_dir, 'node', page3) + + # check data correctness for PAGE3 + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored, backup_id=page3) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata3, pgdata_restored) + + # merge PAGE4 + self.merge_backup(backup_dir, "node", page4) + + # check data correctness for PAGE4 + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored, backup_id=page4) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata4, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_merge_3(self): + """ + Create node, take FULL and PAGE backups with old binary, + merge them with new binary. + old binary version =< 2.2.7 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=50) + + node.safe_psql( + 'postgres', + 'VACUUM pgbench_accounts') + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # FULL backup with OLD binary + self.backup_node( + backup_dir, 'node', node, old_binary=True, options=['--compress']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "1", "-T", "10", "--no-vacuum"]) + pgbench.wait() + pgbench.stdout.close() + + # PAGE1 backup with OLD binary + page1 = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True, options=['--compress']) + + pgdata1 = self.pgdata_content(node.data_dir) + + node.safe_psql( + 'postgres', + "DELETE from pgbench_accounts where ctid > '(10,1)'") + + # PAGE2 backup with OLD binary + page2 = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True, options=['--compress']) + + pgdata2 = self.pgdata_content(node.data_dir) + + # PAGE3 backup with OLD binary + page3 = self.backup_node( + backup_dir, 'node', node, + backup_type='page', old_binary=True, options=['--compress']) + + pgdata3 = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "1", "-T", "10", "--no-vacuum"]) + pgbench.wait() + pgbench.stdout.close() + + # PAGE4 backup with NEW binary + page4 = self.backup_node( + backup_dir, 'node', node, backup_type='page', options=['--compress']) + pgdata4 = self.pgdata_content(node.data_dir) + + # merge backups one by one and check data correctness + # merge PAGE1 + self.merge_backup( + backup_dir, "node", page1, options=['--log-level-file=VERBOSE']) + + # check data correctness for PAGE1 + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, backup_id=page1, + options=['--log-level-file=VERBOSE']) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata1, pgdata_restored) + + # merge PAGE2 + self.merge_backup(backup_dir, "node", page2) + + # check data correctness for PAGE2 + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored, backup_id=page2) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata2, pgdata_restored) + + # merge PAGE3 + self.show_pb(backup_dir, 'node', page3) + self.merge_backup(backup_dir, "node", page3) + self.show_pb(backup_dir, 'node', page3) + + # check data correctness for PAGE3 + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored, backup_id=page3) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata3, pgdata_restored) + + # merge PAGE4 + self.merge_backup(backup_dir, "node", page4) + + # check data correctness for PAGE4 + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored, backup_id=page4) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata4, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_merge_4(self): + """ + Start merge between minor version, crash and retry it. + old binary version =< 2.4.0 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=20) + + node.safe_psql( + 'postgres', + 'VACUUM pgbench_accounts') + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # FULL backup with OLD binary + self.backup_node( + backup_dir, 'node', node, old_binary=True, options=['--compress']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "1", "-T", "20", "--no-vacuum"]) + pgbench.wait() + pgbench.stdout.close() + + # PAGE backup with NEW binary + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', options=['--compress']) + pgdata = self.pgdata_content(node.data_dir) + + # merge PAGE4 + gdb = self.merge_backup(backup_dir, "node", page_id, gdb=True) + + gdb.set_breakpoint('rename') + gdb.run_until_break() + gdb.continue_execution_until_break(500) + gdb._execute('signal SIGKILL') + + try: + self.merge_backup(backup_dir, "node", page_id) + self.assertEqual( + 1, 0, + "Expecting Error because of format changes.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Retry of failed merge for backups with different " + "between minor versions is forbidden to avoid data corruption " + "because of storage format changes introduced in 2.4.0 version, " + "please take a new full backup", + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_vacuum_truncate(self): + """ + make node, create table, take full backup, + delete all data, vacuum relation, + take page backup, insert some data, + take second page backup, + restore latest page backup using new binary + and check data correctness + old binary should be 2.2.x version + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + id1 = self.backup_node(backup_dir, 'node', node, old_binary=True) + pgdata1 = self.pgdata_content(node.data_dir) + + node.safe_psql( + "postgres", + "delete from t_heap") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + id2 = self.backup_node( + backup_dir, 'node', node, backup_type='page', old_binary=True) + pgdata2 = self.pgdata_content(node.data_dir) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1) i") + + id3 = self.backup_node( + backup_dir, 'node', node, backup_type='page', old_binary=True) + pgdata3 = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir, backup_id=id1) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata1, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir, backup_id=id2) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata2, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir, backup_id=id3) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata3, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + node_restored.cleanup() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_vacuum_truncate_compression(self): + """ + make node, create table, take full backup, + delete all data, vacuum relation, + take page backup, insert some data, + take second page backup, + restore latest page backup using new binary + and check data correctness + old binary should be 2.2.x version + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node( + backup_dir, 'node',node, old_binary=True, options=['--compress']) + + node.safe_psql( + "postgres", + "delete from t_heap") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node( + backup_dir, 'node', node, backup_type='page', + old_binary=True, options=['--compress']) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1) i") + + self.backup_node( + backup_dir, 'node', node, backup_type='page', + old_binary=True, options=['--compress']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_vacuum_truncate_compressed_1(self): + """ + make node, create table, take full backup, + delete all data, vacuum relation, + take page backup, insert some data, + take second page backup, + restore latest page backup using new binary + and check data correctness + old binary should be 2.2.x version + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + id1 = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=['--compress']) + pgdata1 = self.pgdata_content(node.data_dir) + + node.safe_psql( + "postgres", + "delete from t_heap") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + id2 = self.backup_node( + backup_dir, 'node', node, backup_type='page', + old_binary=True, options=['--compress']) + pgdata2 = self.pgdata_content(node.data_dir) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1) i") + + id3 = self.backup_node( + backup_dir, 'node', node, backup_type='page', + old_binary=True, options=['--compress']) + pgdata3 = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir, backup_id=id1) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata1, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir, backup_id=id2) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata2, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir, backup_id=id3) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata3, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + node_restored.cleanup() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_hidden_files(self): + """ + old_version should be < 2.3.0 + Create hidden file in pgdata, take backup + with old binary, then try to delete backup + with new binary + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + open(os.path.join(node.data_dir, ".hidden_stuff"), 'a').close() + + backup_id = self.backup_node( + backup_dir, 'node',node, old_binary=True, options=['--stream']) + + self.delete_pb(backup_dir, 'node', backup_id) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/compression.py b/tests/compression.py index aa2753821..321461d6e 100644 --- a/tests/compression.py +++ b/tests/compression.py @@ -12,25 +12,23 @@ class CompressionTest(ProbackupTest, unittest.TestCase): # @unittest.skip("skip") # @unittest.expectedFailure - def test_compression_stream_zlib(self): - """make archive node, make full and page stream backups, check data correctness in restored instance""" + def test_basic_compression_stream_zlib(self): + """ + make archive node, make full and page stream backups, + check data correctness in restored instance + """ self.maxDiff = None fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on'} - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -55,19 +53,17 @@ def test_compression_stream_zlib(self): page_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='page', options=[ - '--stream', '--compress-algorithm=zlib', - '--log-level-console=verbose', - '--log-level-file=verbose']) + '--stream', '--compress-algorithm=zlib']) - # PTRACK BACKUP + # DELTA BACKUP node.safe_psql( "postgres", "insert into t_heap select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(512,768) i") - ptrack_result = node.execute("postgres", "SELECT * FROM t_heap") - ptrack_backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', + delta_result = node.execute("postgres", "SELECT * FROM t_heap") + delta_backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--stream', '--compress-algorithm=zlib']) # Drop Node @@ -105,11 +101,11 @@ def test_compression_stream_zlib(self): self.assertEqual(page_result, page_result_new) node.cleanup() - # Check ptrack backup + # Check delta backup self.assertIn( - "INFO: Restore of backup {0} completed.".format(ptrack_backup_id), + "INFO: Restore of backup {0} completed.".format(delta_backup_id), self.restore_node( - backup_dir, 'node', node, backup_id=ptrack_backup_id, + backup_dir, 'node', node, backup_id=delta_backup_id, options=[ "-j", "4", "--immediate", "--recovery-target-action=promote"]), @@ -117,12 +113,11 @@ def test_compression_stream_zlib(self): repr(self.output), self.cmd)) node.slow_start() - ptrack_result_new = node.execute("postgres", "SELECT * FROM t_heap") - self.assertEqual(ptrack_result, ptrack_result_new) - node.cleanup() + delta_result_new = node.execute("postgres", "SELECT * FROM t_heap") + self.assertEqual(delta_result, delta_result_new) # Clean after yourself - self.del_test_dir(module_name, fname) + self.del_test_dir(module_name, fname, [node]) def test_compression_archive_zlib(self): """ @@ -133,18 +128,14 @@ def test_compression_archive_zlib(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on'} - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -167,14 +158,14 @@ def test_compression_archive_zlib(self): backup_dir, 'node', node, backup_type='page', options=["--compress-algorithm=zlib"]) - # PTRACK BACKUP + # DELTA BACKUP node.safe_psql( "postgres", "insert into t_heap select i as id, md5(i::text) as text, " "md5(i::text)::tsvector as tsvector from generate_series(0,3) i") - ptrack_result = node.execute("postgres", "SELECT * FROM t_heap") - ptrack_backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', + delta_result = node.execute("postgres", "SELECT * FROM t_heap") + delta_backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--compress-algorithm=zlib']) # Drop Node @@ -212,11 +203,11 @@ def test_compression_archive_zlib(self): self.assertEqual(page_result, page_result_new) node.cleanup() - # Check ptrack backup + # Check delta backup self.assertIn( - "INFO: Restore of backup {0} completed.".format(ptrack_backup_id), + "INFO: Restore of backup {0} completed.".format(delta_backup_id), self.restore_node( - backup_dir, 'node', node, backup_id=ptrack_backup_id, + backup_dir, 'node', node, backup_id=delta_backup_id, options=[ "-j", "4", "--immediate", "--recovery-target-action=promote"]), @@ -224,8 +215,8 @@ def test_compression_archive_zlib(self): repr(self.output), self.cmd)) node.slow_start() - ptrack_result_new = node.execute("postgres", "SELECT * FROM t_heap") - self.assertEqual(ptrack_result, ptrack_result_new) + delta_result_new = node.execute("postgres", "SELECT * FROM t_heap") + self.assertEqual(delta_result, delta_result_new) node.cleanup() # Clean after yourself @@ -240,19 +231,14 @@ def test_compression_stream_pglz(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on'} - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -276,15 +262,15 @@ def test_compression_stream_pglz(self): backup_dir, 'node', node, backup_type='page', options=['--stream', '--compress-algorithm=pglz']) - # PTRACK BACKUP + # DELTA BACKUP node.safe_psql( "postgres", "insert into t_heap select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(512,768) i") - ptrack_result = node.execute("postgres", "SELECT * FROM t_heap") - ptrack_backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', + delta_result = node.execute("postgres", "SELECT * FROM t_heap") + delta_backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--stream', '--compress-algorithm=pglz']) # Drop Node @@ -322,11 +308,11 @@ def test_compression_stream_pglz(self): self.assertEqual(page_result, page_result_new) node.cleanup() - # Check ptrack backup + # Check delta backup self.assertIn( - "INFO: Restore of backup {0} completed.".format(ptrack_backup_id), + "INFO: Restore of backup {0} completed.".format(delta_backup_id), self.restore_node( - backup_dir, 'node', node, backup_id=ptrack_backup_id, + backup_dir, 'node', node, backup_id=delta_backup_id, options=[ "-j", "4", "--immediate", "--recovery-target-action=promote"]), @@ -334,8 +320,8 @@ def test_compression_stream_pglz(self): repr(self.output), self.cmd)) node.slow_start() - ptrack_result_new = node.execute("postgres", "SELECT * FROM t_heap") - self.assertEqual(ptrack_result, ptrack_result_new) + delta_result_new = node.execute("postgres", "SELECT * FROM t_heap") + self.assertEqual(delta_result, delta_result_new) node.cleanup() # Clean after yourself @@ -350,19 +336,14 @@ def test_compression_archive_pglz(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on'} - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -386,15 +367,15 @@ def test_compression_archive_pglz(self): backup_dir, 'node', node, backup_type='page', options=['--compress-algorithm=pglz']) - # PTRACK BACKUP + # DELTA BACKUP node.safe_psql( "postgres", "insert into t_heap select i as id, md5(i::text) as text, " "md5(i::text)::tsvector as tsvector " "from generate_series(200,300) i") - ptrack_result = node.execute("postgres", "SELECT * FROM t_heap") - ptrack_backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', + delta_result = node.execute("postgres", "SELECT * FROM t_heap") + delta_backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--compress-algorithm=pglz']) # Drop Node @@ -432,11 +413,11 @@ def test_compression_archive_pglz(self): self.assertEqual(page_result, page_result_new) node.cleanup() - # Check ptrack backup + # Check delta backup self.assertIn( - "INFO: Restore of backup {0} completed.".format(ptrack_backup_id), + "INFO: Restore of backup {0} completed.".format(delta_backup_id), self.restore_node( - backup_dir, 'node', node, backup_id=ptrack_backup_id, + backup_dir, 'node', node, backup_id=delta_backup_id, options=[ "-j", "4", "--immediate", "--recovery-target-action=promote"]), @@ -444,8 +425,8 @@ def test_compression_archive_pglz(self): repr(self.output), self.cmd)) node.slow_start() - ptrack_result_new = node.execute("postgres", "SELECT * FROM t_heap") - self.assertEqual(ptrack_result, ptrack_result_new) + delta_result_new = node.execute("postgres", "SELECT * FROM t_heap") + self.assertEqual(delta_result, delta_result_new) node.cleanup() # Clean after yourself @@ -460,20 +441,14 @@ def test_compression_wrong_algorithm(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on'} - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() try: self.backup_node( @@ -494,3 +469,54 @@ def test_compression_wrong_algorithm(self): # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incompressible_pages(self): + """ + make archive node, create table with incompressible toast pages, + take backup with compression, make sure that page was not compressed, + restore backup and check data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Full + self.backup_node( + backup_dir, 'node', node, + options=[ + '--compress-algorithm=zlib', + '--compress-level=0']) + + node.pgbench_init(scale=3) + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + '--compress-algorithm=zlib', + '--compress-level=0']) + + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + # Physical comparison + if self.paranoia: + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + node.slow_start() + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/config.py b/tests/config.py new file mode 100644 index 000000000..b41382204 --- /dev/null +++ b/tests/config.py @@ -0,0 +1,117 @@ +import unittest +import subprocess +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from sys import exit +from shutil import copyfile + +module_name = 'config' + + +class ConfigTest(ProbackupTest, unittest.TestCase): + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_remove_instance_config(self): + """remove pg_probackup.conf""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.show_pb(backup_dir) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + conf_file = os.path.join( + backup_dir, 'backups','node', 'pg_probackup.conf') + + os.unlink(os.path.join(backup_dir, 'backups','node', 'pg_probackup.conf')) + + try: + self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.assertEqual( + 1, 0, + "Expecting Error because pg_probackup.conf is missing. " + ".\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: could not open file "{0}": ' + 'No such file or directory'.format(conf_file), + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_corrupt_backup_content(self): + """corrupt backup_content.control""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + full1_id = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + 'postgres', + 'create table t1()') + + fulle2_id = self.backup_node(backup_dir, 'node', node) + + fulle1_conf_file = os.path.join( + backup_dir, 'backups','node', full1_id, 'backup_content.control') + + fulle2_conf_file = os.path.join( + backup_dir, 'backups','node', fulle2_id, 'backup_content.control') + + copyfile(fulle2_conf_file, fulle1_conf_file) + + try: + self.validate_pb(backup_dir, 'node') + self.assertEqual( + 1, 0, + "Expecting Error because pg_probackup.conf is missing. " + ".\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "WARNING: Invalid CRC of backup control file '{0}':".format(fulle1_conf_file), + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + "WARNING: Failed to get file list for backup {0}".format(full1_id), + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + "WARNING: Backup {0} file list is corrupted".format(full1_id), + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.show_pb(backup_dir, 'node', full1_id)['status'] + + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], 'CORRUPT') + self.assertEqual(self.show_pb(backup_dir, 'node')[1]['status'], 'OK') diff --git a/tests/delete.py b/tests/delete.py new file mode 100644 index 000000000..8ebd7d13a --- /dev/null +++ b/tests/delete.py @@ -0,0 +1,875 @@ +import unittest +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +import subprocess +from sys import exit + + +module_name = 'delete' + + +class DeleteTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_delete_full_backups(self): + """delete full backups""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node) + + show_backups = self.show_pb(backup_dir, 'node') + id_1 = show_backups[0]['id'] + id_2 = show_backups[1]['id'] + id_3 = show_backups[2]['id'] + self.delete_pb(backup_dir, 'node', id_2) + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(show_backups[0]['id'], id_1) + self.assertEqual(show_backups[1]['id'], id_3) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_del_instance_archive(self): + """delete full backups""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup + self.backup_node(backup_dir, 'node', node) + + # full backup + self.backup_node(backup_dir, 'node', node) + + # restore + node.cleanup() + self.restore_node(backup_dir, 'node', node) + node.slow_start() + + # Delete instance + self.del_instance(backup_dir, 'node') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_delete_archive_mix_compress_and_non_compressed_segments(self): + """delete full backups""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving( + backup_dir, 'node', node, compress=False) + node.slow_start() + + # full backup + self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=10) + + # Restart archiving with compression + self.set_archiving(backup_dir, 'node', node, compress=True) + + node.restart() + + # full backup + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--retention-redundancy=3', + '--delete-expired']) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--retention-redundancy=3', + '--delete-expired']) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--retention-redundancy=3', + '--delete-expired']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_increment_page(self): + """delete increment and all after him""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup mode + self.backup_node(backup_dir, 'node', node) + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="page") + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="page") + # full backup mode + self.backup_node(backup_dir, 'node', node) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 4) + + # delete first page backup + self.delete_pb(backup_dir, 'node', show_backups[1]['id']) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 2) + + self.assertEqual(show_backups[0]['backup-mode'], "FULL") + self.assertEqual(show_backups[0]['status'], "OK") + self.assertEqual(show_backups[1]['backup-mode'], "FULL") + self.assertEqual(show_backups[1]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_increment_ptrack(self): + """delete increment and all after him""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + 'postgres', + 'CREATE EXTENSION ptrack') + + # full backup mode + self.backup_node(backup_dir, 'node', node) + # ptrack backup mode + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + # ptrack backup mode + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + # full backup mode + self.backup_node(backup_dir, 'node', node) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 4) + + # delete first page backup + self.delete_pb(backup_dir, 'node', show_backups[1]['id']) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 2) + + self.assertEqual(show_backups[0]['backup-mode'], "FULL") + self.assertEqual(show_backups[0]['status'], "OK") + self.assertEqual(show_backups[1]['backup-mode'], "FULL") + self.assertEqual(show_backups[1]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_orphaned_wal_segments(self): + """ + make archive node, make three full backups, + delete second backup without --wal option, + then delete orphaned wals via --wal option + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + # first full backup + backup_1_id = self.backup_node(backup_dir, 'node', node) + # second full backup + backup_2_id = self.backup_node(backup_dir, 'node', node) + # third full backup + backup_3_id = self.backup_node(backup_dir, 'node', node) + node.stop() + + # Check wals + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f))] + original_wal_quantity = len(wals) + + # delete second full backup + self.delete_pb(backup_dir, 'node', backup_2_id) + # check wal quantity + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + # try to delete wals for second backup + self.delete_pb(backup_dir, 'node', options=['--wal']) + # check wal quantity + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + + # delete first full backup + self.delete_pb(backup_dir, 'node', backup_1_id) + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + + result = self.delete_pb(backup_dir, 'node', options=['--wal']) + # delete useless wals + self.assertTrue('On timeline 1 WAL segments between ' in result + and 'will be removed' in result) + + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + + # Check quantity, it should be lower than original + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f))] + self.assertTrue(original_wal_quantity > len(wals), "Number of wals not changed after 'delete --wal' which is illegal") + + # Delete last backup + self.delete_pb(backup_dir, 'node', backup_3_id, options=['--wal']) + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f))] + self.assertEqual (0, len(wals), "Number of wals should be equal to 0") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_wal_between_multiple_timelines(self): + """ + /-------B1-- + A1----------------A2---- + + delete A1 backup, check that WAL segments on [A1, A2) and + [A1, B1) are deleted and backups B1 and A2 keep + their WAL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + A1 = self.backup_node(backup_dir, 'node', node) + + # load some data to node + node.pgbench_init(scale=3) + + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + + self.restore_node(backup_dir, 'node', node2) + self.set_auto_conf(node2, {'port': node2.port}) + node2.slow_start() + + # load some more data to node + node.pgbench_init(scale=3) + + # take A2 + A2 = self.backup_node(backup_dir, 'node', node) + + # load some more data to node2 + node2.pgbench_init(scale=2) + + B1 = self.backup_node( + backup_dir, 'node', + node2, data_dir=node2.data_dir) + + self.delete_pb(backup_dir, 'node', backup_id=A1, options=['--wal']) + + self.validate_pb(backup_dir) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_backup_with_empty_control_file(self): + """ + take backup, truncate its control file, + try to delete it via 'delete' command + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + set_replication=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # full backup mode + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + # page backup mode + self.backup_node( + backup_dir, 'node', node, backup_type="delta", options=['--stream']) + # page backup mode + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", options=['--stream']) + + with open( + os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.control'), + 'wt') as f: + f.flush() + f.close() + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 3) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change FULLb to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 and FULLb status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Change PAGEa1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa2 and FULla to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # Change PAGEb1 and FULlb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 and FULLa status to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGEc1 OK + # FULLc OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Delete FULLb + self.delete_pb( + backup_dir, 'node', backup_id_b) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 5) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_multiple_descendants(self): + """ + PAGEb3 + | PAGEa3 + PAGEb2 / + | PAGEa2 / + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa should be deleted + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULLb to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Change PAGEa1 to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 and FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa2 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + # Change PAGEb2, PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Change FULLa to OK + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa3 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3, PAGEa2 and PAGEb1 to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + # Delete FULLa + self.delete_pb(backup_dir, 'node', backup_id_a) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_multiple_descendants_dry_run(self): + """ + PAGEa3 + PAGEa2 / + \ / + PAGEa1 (delete target) + | + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUP + node.pgbench_init(scale=1) + backup_id_a = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + + # Change PAGEa2 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + + # Delete PAGEa1 + output = self.delete_pb( + backup_dir, 'node', page_id_a1, + options=['--dry-run', '--log-level-console=LOG', '--delete-wal']) + + print(output) + self.assertIn( + 'LOG: Backup {0} can be deleted'.format(page_id_a3), + output) + self.assertIn( + 'LOG: Backup {0} can be deleted'.format(page_id_a2), + output) + self.assertIn( + 'LOG: Backup {0} can be deleted'.format(page_id_a1), + output) + + self.assertIn( + 'INFO: Resident data size to free by ' + 'delete of backup {0} :'.format(page_id_a1), + output) + + self.assertIn( + 'On timeline 1 WAL segments between 000000010000000000000001 ' + 'and 000000010000000000000003 can be removed', + output) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + output = self.delete_pb( + backup_dir, 'node', page_id_a1, + options=['--log-level-console=LOG', '--delete-wal']) + + self.assertIn( + 'LOG: Backup {0} will be deleted'.format(page_id_a3), + output) + self.assertIn( + 'LOG: Backup {0} will be deleted'.format(page_id_a2), + output) + self.assertIn( + 'LOG: Backup {0} will be deleted'.format(page_id_a1), + output) + self.assertIn( + 'INFO: Resident data size to free by ' + 'delete of backup {0} :'.format(page_id_a1), + output) + + self.assertIn( + 'On timeline 1 WAL segments between 000000010000000000000001 ' + 'and 000000010000000000000003 will be removed', + output) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 1) + + self.validate_pb(backup_dir, 'node') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_delete_error_backups(self): + """delete increment and all after him""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup mode + self.backup_node(backup_dir, 'node', node) + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="page") + + # Take FULL BACKUP + backup_id_a = self.backup_node(backup_dir, 'node', node) + # Take PAGE BACKUP + backup_id_b = self.backup_node(backup_dir, 'node', node, backup_type="page") + + backup_id_c = self.backup_node(backup_dir, 'node', node, backup_type="page") + + backup_id_d = self.backup_node(backup_dir, 'node', node, backup_type="page") + + # full backup mode + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + backup_id_e = self.backup_node(backup_dir, 'node', node, backup_type="page") + self.backup_node(backup_dir, 'node', node, backup_type="page") + + # Change status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_c, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_e, 'ERROR') + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 10) + + # delete error backups + output = self.delete_pb(backup_dir, 'node', options=['--status=ERROR', '--dry-run']) + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 10) + + self.assertIn( + "Deleting all backups with status 'ERROR' in dry run mode", + output) + + self.assertIn( + "INFO: Backup {0} with status OK can be deleted".format(backup_id_d), + output) + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + show_backups = self.show_pb(backup_dir, 'node') + output = self.delete_pb(backup_dir, 'node', options=['--status=ERROR']) + print(output) + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 4) + + self.assertEqual(show_backups[0]['status'], "OK") + self.assertEqual(show_backups[1]['status'], "OK") + self.assertEqual(show_backups[2]['status'], "OK") + self.assertEqual(show_backups[3]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/delete_test.py b/tests/delete_test.py deleted file mode 100644 index 4afb15ae0..000000000 --- a/tests/delete_test.py +++ /dev/null @@ -1,203 +0,0 @@ -import unittest -import os -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException -import subprocess -from sys import exit - - -module_name = 'delete' - - -class DeleteTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_delete_full_backups(self): - """delete full backups""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # full backup - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node) - - show_backups = self.show_pb(backup_dir, 'node') - id_1 = show_backups[0]['id'] - id_2 = show_backups[1]['id'] - id_3 = show_backups[2]['id'] - self.delete_pb(backup_dir, 'node', id_2) - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(show_backups[0]['id'], id_1) - self.assertEqual(show_backups[1]['id'], id_3) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_delete_increment_page(self): - """delete increment and all after him""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # full backup mode - self.backup_node(backup_dir, 'node', node) - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="page") - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="page") - # full backup mode - self.backup_node(backup_dir, 'node', node) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 4) - - # delete first page backup - self.delete_pb(backup_dir, 'node', show_backups[1]['id']) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 2) - - self.assertEqual(show_backups[0]['backup-mode'], "FULL") - self.assertEqual(show_backups[0]['status'], "OK") - self.assertEqual(show_backups[1]['backup-mode'], "FULL") - self.assertEqual(show_backups[1]['status'], "OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_delete_increment_ptrack(self): - """delete increment and all after him""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # full backup mode - self.backup_node(backup_dir, 'node', node) - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - # full backup mode - self.backup_node(backup_dir, 'node', node) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 4) - - # delete first page backup - self.delete_pb(backup_dir, 'node', show_backups[1]['id']) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 2) - - self.assertEqual(show_backups[0]['backup-mode'], "FULL") - self.assertEqual(show_backups[0]['status'], "OK") - self.assertEqual(show_backups[1]['backup-mode'], "FULL") - self.assertEqual(show_backups[1]['status'], "OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_delete_orphaned_wal_segments(self): - """make archive node, make three full backups, delete second backup without --wal option, then delete orphaned wals via --wal option""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - # first full backup - backup_1_id = self.backup_node(backup_dir, 'node', node) - # second full backup - backup_2_id = self.backup_node(backup_dir, 'node', node) - # third full backup - backup_3_id = self.backup_node(backup_dir, 'node', node) - node.stop() - - # Check wals - wals_dir = os.path.join(backup_dir, 'wal', 'node') - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - original_wal_quantity = len(wals) - - # delete second full backup - self.delete_pb(backup_dir, 'node', backup_2_id) - # check wal quantity - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - # try to delete wals for second backup - self.delete_pb(backup_dir, 'node', options=['--wal']) - # check wal quantity - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - - # delete first full backup - self.delete_pb(backup_dir, 'node', backup_1_id) - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - - result = self.delete_pb(backup_dir, 'node', options=['--wal']) - # delete useless wals - self.assertTrue('INFO: removed min WAL segment' in result - and 'INFO: removed max WAL segment' in result) - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - - # Check quantity, it should be lower than original - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - self.assertTrue(original_wal_quantity > len(wals), "Number of wals not changed after 'delete --wal' which is illegal") - - # Delete last backup - self.delete_pb(backup_dir, 'node', backup_3_id, options=['--wal']) - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - self.assertEqual (0, len(wals), "Number of wals should be equal to 0") - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/delta.py b/tests/delta.py index 40450016b..0abdd1c2c 100644 --- a/tests/delta.py +++ b/tests/delta.py @@ -5,6 +5,7 @@ from testgres import QueryException import subprocess import time +from threading import Thread module_name = 'delta' @@ -13,7 +14,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): # @unittest.skip("skip") - def test_delta_vacuum_truncate_1(self): + def test_basic_delta_vacuum_truncate(self): """ make node, create table, take full backup, delete last 3 pages, vacuum relation, @@ -23,25 +24,20 @@ def test_delta_vacuum_truncate_1(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'autovacuum': 'off' - } - ) + 'autovacuum': 'off'}) + node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) node_restored.cleanup() - node.start() + node.slow_start() node.safe_psql( "postgres", @@ -49,59 +45,45 @@ def test_delta_vacuum_truncate_1(self): "create table t_heap as select i as id, " "md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1024) i;" - ) + "from generate_series(0,1024) i;") node.safe_psql( "postgres", - "vacuum t_heap" - ) + "vacuum t_heap") self.backup_node(backup_dir, 'node', node, options=['--stream']) node.safe_psql( "postgres", - "delete from t_heap where ctid >= '(11,0)'" - ) + "delete from t_heap where ctid >= '(11,0)'") node.safe_psql( "postgres", - "vacuum t_heap" - ) + "vacuum t_heap") self.backup_node( - backup_dir, 'node', node, backup_type='delta' - ) + backup_dir, 'node', node, backup_type='delta') self.backup_node( - backup_dir, 'node', node, backup_type='delta' - ) + backup_dir, 'node', node, backup_type='delta') pgdata = self.pgdata_content(node.data_dir) self.restore_node( - backup_dir, - 'node', - node_restored, - options=[ - "-j", "1", - "--log-level-file=verbose" - ] - ) + backup_dir, 'node', node_restored) # Physical comparison pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself - self.del_test_dir(module_name, fname) + self.del_test_dir(module_name, fname, [node, node_restored]) # @unittest.skip("skip") - def test_delta_vacuum_truncate_2(self): + def test_delta_vacuum_truncate_1(self): """ make node, create table, take full backup, delete last 3 pages, vacuum relation, @@ -111,25 +93,22 @@ def test_delta_vacuum_truncate_2(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', 'autovacuum': 'off' } ) node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node_restored'), ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) node_restored.cleanup() - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') node.safe_psql( @@ -176,8 +155,6 @@ def test_delta_vacuum_truncate_2(self): 'node', node_restored, options=[ - "-j", "1", - "--log-level-file=verbose", "-T", "{0}={1}".format( old_tablespace, new_tablespace)] ) @@ -186,15 +163,14 @@ def test_delta_vacuum_truncate_2(self): pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_delta_vacuum_truncate_3(self): + def test_delta_vacuum_truncate_2(self): """ make node, create table, take full backup, delete last 3 pages, vacuum relation, @@ -204,25 +180,22 @@ def test_delta_vacuum_truncate_3(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', 'autovacuum': 'off' } ) node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node_restored'), ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) node_restored.cleanup() - node.start() + node.slow_start() node.safe_psql( "postgres", @@ -241,32 +214,22 @@ def test_delta_vacuum_truncate_3(self): os.unlink(os.path.join(node.data_dir, filepath + '.1')) self.backup_node( - backup_dir, 'node', node, backup_type='delta' - ) + backup_dir, 'node', node, backup_type='delta') self.backup_node( - backup_dir, 'node', node, backup_type='delta' - ) + backup_dir, 'node', node, backup_type='delta') pgdata = self.pgdata_content(node.data_dir) self.restore_node( - backup_dir, - 'node', - node_restored, - options=[ - "-j", "1", - "--log-level-file=verbose" - ] - ) + backup_dir, 'node', node_restored) # Physical comparison pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -280,12 +243,10 @@ def test_delta_stream(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s' } ) @@ -293,7 +254,7 @@ def test_delta_stream(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -332,7 +293,7 @@ def test_delta_stream(self): "--recovery-target-action=promote"]), '\n Unexpected Error Message: {0}\n' ' CMD: {1}'.format(repr(self.output), self.cmd)) - node.start() + node.slow_start() full_result_new = node.execute("postgres", "SELECT * FROM t_heap") self.assertEqual(full_result, full_result_new) node.cleanup() @@ -348,7 +309,7 @@ def test_delta_stream(self): "--recovery-target-action=promote"]), '\n Unexpected Error Message: {0}\n' ' CMD: {1}'.format(repr(self.output), self.cmd)) - node.start() + node.slow_start() delta_result_new = node.execute("postgres", "SELECT * FROM t_heap") self.assertEqual(delta_result, delta_result_new) node.cleanup() @@ -366,20 +327,14 @@ def test_delta_archive(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s' - } - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - # self.set_archiving(backup_dir, 'node', node) - node.start() + self.set_archiving(backup_dir, 'node', node) + node.slow_start() # FULL BACKUP node.safe_psql( @@ -388,8 +343,7 @@ def test_delta_archive(self): "md5(i::text)::tsvector as tsvector from generate_series(0,1) i") full_result = node.execute("postgres", "SELECT * FROM t_heap") full_backup_id = self.backup_node( - backup_dir, 'node', node, - backup_type='full', options=['--stream']) + backup_dir, 'node', node, backup_type='full') # delta BACKUP node.safe_psql( @@ -398,8 +352,7 @@ def test_delta_archive(self): "md5(i::text)::tsvector as tsvector from generate_series(0,2) i") delta_result = node.execute("postgres", "SELECT * FROM t_heap") delta_backup_id = self.backup_node( - backup_dir, 'node', node, - backup_type='delta', options=['--stream']) + backup_dir, 'node', node, backup_type='delta') # Drop Node node.cleanup() @@ -415,7 +368,7 @@ def test_delta_archive(self): "--recovery-target-action=promote"]), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd)) - node.start() + node.slow_start() full_result_new = node.execute("postgres", "SELECT * FROM t_heap") self.assertEqual(full_result, full_result_new) node.cleanup() @@ -431,7 +384,7 @@ def test_delta_archive(self): "--recovery-target-action=promote"]), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd)) - node.start() + node.slow_start() delta_result_new = node.execute("postgres", "SELECT * FROM t_heap") self.assertEqual(delta_result, delta_result_new) node.cleanup() @@ -448,12 +401,10 @@ def test_delta_multiple_segments(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'fsync': 'off', 'shared_buffers': '1GB', 'maintenance_work_mem': '1GB', @@ -465,7 +416,7 @@ def test_delta_multiple_segments(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) # self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') @@ -492,22 +443,21 @@ def test_delta_multiple_segments(self): # RESTORE NODE restored_node = self.make_simple_node( - base_dir="{0}/{1}/restored_node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'restored_node')) restored_node.cleanup() tblspc_path = self.get_tblspace_path(node, 'somedata') tblspc_path_new = self.get_tblspace_path( restored_node, 'somedata_restored') - self.restore_node(backup_dir, 'node', restored_node, options=[ - "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new), - "--recovery-target-action=promote"]) + self.restore_node( + backup_dir, 'node', restored_node, options=[ + "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new)]) # GET PHYSICAL CONTENT FROM NODE_RESTORED pgdata_restored = self.pgdata_content(restored_node.data_dir) # START RESTORED NODE - restored_node.append_conf( - "postgresql.auto.conf", "port = {0}".format(restored_node.port)) + self.set_auto_conf(restored_node, {'port': restored_node.port}) restored_node.slow_start() result_new = restored_node.safe_psql( @@ -531,23 +481,17 @@ def test_delta_vacuum_full(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s' - } - ) + initdb_params=['--data-checksums']) + node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node_restored.cleanup() - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') self.backup_node(backup_dir, 'node', node, options=['--stream']) @@ -558,62 +502,49 @@ def test_delta_vacuum_full(self): " as id from generate_series(0,1000000) i" ) - # create async connection - conn = self.get_async_connect(port=node.port) - - self.wait(conn) - - acurs = conn.cursor() - acurs.execute("select pg_backend_pid()") + pg_connect = node.connect("postgres", autocommit=True) - self.wait(conn) - pid = acurs.fetchall()[0][0] - print(pid) - - gdb = self.gdb_attach(pid) + gdb = self.gdb_attach(pg_connect.pid) gdb.set_breakpoint('reform_and_rewrite_tuple') - if not gdb.continue_execution_until_running(): - print('Failed gdb continue') - exit(1) + gdb.continue_execution_until_running() - acurs.execute("VACUUM FULL t_heap") + process = Thread( + target=pg_connect.execute, args=["VACUUM FULL t_heap"]) + process.start() - if gdb.stopped_in_breakpoint(): - if gdb.continue_execution_until_break(20) != 'breakpoint-hit': - print('Failed to hit breakpoint') - exit(1) + while not gdb.stopped_in_breakpoint: + sleep(1) - self.backup_node( - backup_dir, 'node', node, - backup_type='delta', options=['--stream'] - ) + gdb.continue_execution_until_break(20) self.backup_node( backup_dir, 'node', node, - backup_type='delta', options=['--stream'] - ) + backup_type='delta', options=['--stream']) + if self.paranoia: pgdata = self.pgdata_content(node.data_dir) + gdb.remove_all_breakpoints() + gdb._execute('detach') + process.join() + old_tablespace = self.get_tblspace_path(node, 'somedata') new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') self.restore_node( backup_dir, 'node', node_restored, options=["-j", "4", "-T", "{0}={1}".format( - old_tablespace, new_tablespace)] - ) + old_tablespace, new_tablespace)]) # Physical comparison if self.paranoia: pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) + self.set_auto_conf(node_restored, {'port': node_restored.port}) - node_restored.start() + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -627,21 +558,18 @@ def test_create_db(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_size': '10GB', - 'max_wal_senders': '2', - 'checkpoint_timeout': '5min', 'autovacuum': 'off' } ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -673,7 +601,7 @@ def test_create_db(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) + base_dir=os.path.join(module_name, fname, 'node_restored') ) node_restored.cleanup() @@ -683,7 +611,7 @@ def test_create_db(self): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"]) @@ -693,9 +621,8 @@ def test_create_db(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # DROP DATABASE DB1 node.safe_psql( @@ -717,7 +644,7 @@ def test_create_db(self): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"] ) @@ -728,9 +655,8 @@ def test_create_db(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() try: node_restored.safe_psql('db1', 'select 1') @@ -739,14 +665,12 @@ def test_create_db(self): 1, 0, "Expecting Error because we are connecting to deleted database" "\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd) - ) + repr(self.output), self.cmd)) except QueryException as e: self.assertTrue( 'FATAL: database "db1" does not exist' in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd) - ) + repr(e.message), self.cmd)) # Clean after yourself self.del_test_dir(module_name, fname) @@ -760,13 +684,11 @@ def test_exists_in_previous_backup(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_size': '10GB', - 'max_wal_senders': '2', 'checkpoint_timeout': '5min', 'autovacuum': 'off' } @@ -775,7 +697,7 @@ def test_exists_in_previous_backup(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -815,7 +737,7 @@ def test_exists_in_previous_backup(self): backup_id = self.backup_node( backup_dir, 'node', node, backup_type='delta', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) # if self.paranoia: # pgdata_delta = self.pgdata_content( @@ -834,7 +756,7 @@ def test_exists_in_previous_backup(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) + base_dir=os.path.join(module_name, fname, 'node_restored') ) node_restored.cleanup() @@ -844,7 +766,7 @@ def test_exists_in_previous_backup(self): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"]) @@ -854,9 +776,8 @@ def test_exists_in_previous_backup(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -870,11 +791,9 @@ def test_alter_table_set_tablespace_delta(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', 'autovacuum': 'off' } @@ -882,7 +801,7 @@ def test_alter_table_set_tablespace_delta(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP self.create_tblspace_in_node(node, 'somedata') @@ -890,8 +809,8 @@ def test_alter_table_set_tablespace_delta(self): "postgres", "create table t_heap tablespace somedata as select i as id," " md5(i::text) as text, md5(i::text)::tsvector as tsvector" - " from generate_series(0,100) i" - ) + " from generate_series(0,100) i") + # FULL backup self.backup_node(backup_dir, 'node', node, options=["--stream"]) @@ -899,8 +818,7 @@ def test_alter_table_set_tablespace_delta(self): self.create_tblspace_in_node(node, 'somedata_new') node.safe_psql( "postgres", - "alter table t_heap set tablespace somedata_new" - ) + "alter table t_heap set tablespace somedata_new") # DELTA BACKUP result = node.safe_psql( @@ -908,15 +826,14 @@ def test_alter_table_set_tablespace_delta(self): self.backup_node( backup_dir, 'node', node, backup_type='delta', - options=["--stream"] - ) + options=["--stream"]) + if self.paranoia: pgdata = self.pgdata_content(node.data_dir) # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) node_restored.cleanup() self.restore_node( @@ -930,8 +847,7 @@ def test_alter_table_set_tablespace_delta(self): "-T", "{0}={1}".format( self.get_tblspace_path(node, 'somedata_new'), self.get_tblspace_path(node_restored, 'somedata_new') - ), - "--recovery-target-action=promote" + ) ] ) @@ -941,8 +857,7 @@ def test_alter_table_set_tablespace_delta(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node_restored.port)) + self.set_auto_conf(node_restored, {'port': node_restored.port}) node_restored.slow_start() result_new = node_restored.safe_psql( @@ -963,19 +878,17 @@ def test_alter_database_set_tablespace_delta(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, initdb_params=['--data-checksums'], + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', 'autovacuum': 'off' } ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') # FULL backup @@ -1016,7 +929,7 @@ def test_alter_database_set_tablespace_delta(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) + base_dir=os.path.join(module_name, fname, 'node_restored') ) node_restored.cleanup() @@ -1041,9 +954,8 @@ def test_alter_database_set_tablespace_delta(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -1057,11 +969,9 @@ def test_delta_delete(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', 'autovacuum': 'off' } @@ -1070,7 +980,7 @@ def test_delta_delete(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') @@ -1106,7 +1016,7 @@ def test_delta_delete(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) + base_dir=os.path.join(module_name, fname, 'node_restored') ) node_restored.cleanup() @@ -1127,139 +1037,240 @@ def test_delta_delete(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) - # @unittest.skip("skip") - def test_page_corruption_heal_via_ptrack_1(self): - """make node, corrupt some page, check that backup failed""" + def test_delta_nullified_heap_page_backup(self): + """ + make node, take full backup, nullify some heap block, + take delta backup, restore, physically compare pgdata`s + """ fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + self.set_archiving(backup_dir, 'node', node) + node.slow_start() - self.backup_node( - backup_dir, 'node', node, - backup_type="full", options=["-j", "4", "--stream"]) + node.pgbench_init(scale=1) - node.safe_psql( + file_path = node.safe_psql( "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") + "select pg_relation_filepath('pgbench_accounts')").rstrip() + node.safe_psql( "postgres", - "CHECKPOINT;") + "CHECKPOINT") - heap_path = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() + self.backup_node( + backup_dir, 'node', node) - with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f: - f.seek(9000) - f.write(b"bla") - f.flush() - f.close + # Nullify some block in PostgreSQL + file = os.path.join(node.data_dir, file_path).replace("\\", "/") + if os.name == 'nt': + file = file.replace("\\", "/") - self.backup_node( - backup_dir, 'node', node, backup_type="delta", - options=["-j", "4", "--stream", "--log-level-file=verbose"]) - - # open log file and check - with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: - log_content = f.read() - self.assertIn('block 1, try to fetch via SQL', log_content) - self.assertIn('SELECT pg_catalog.pg_ptrack_get_block', log_content) + with open(file, 'r+b', 0) as f: + f.seek(8192) + f.write(b"\x00"*8192) + f.flush() f.close - self.assertTrue( - self.show_pb(backup_dir, 'node')[1]['status'] == 'OK', - "Backup Status should be OK") + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=["--log-level-file=verbose"]) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + if not self.remote: + log_file_path = os.path.join(backup_dir, "log", "pg_probackup.log") + with open(log_file_path) as f: + content = f.read() + + self.assertIn( + 'VERBOSE: File: "{0}" blknum 1, empty page'.format(file), + content) + self.assertNotIn( + "Skipping blknum 1 in file: {0}".format(file), + content) + + # Restore DELTA backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) # Clean after yourself self.del_test_dir(module_name, fname) - # @unittest.skip("skip") - def test_page_corruption_heal_via_ptrack_2(self): - """make node, corrupt some page, check that backup failed""" + def test_delta_backup_from_past(self): + """ + make node, take FULL stream backup, take DELTA stream backup, + restore FULL backup, try to take second DELTA stream backup + """ fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() - self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream"]) + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) - node.safe_psql( - "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") - node.safe_psql( - "postgres", - "CHECKPOINT;") + node.pgbench_init(scale=3) - heap_path = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - node.stop() + # First DELTA + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) - with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f: - f.seek(9000) - f.write(b"bla") - f.flush() - f.close - node.start() + # Restore FULL backup + node.cleanup() + self.restore_node(backup_dir, 'node', node, backup_id=backup_id) + node.slow_start() + # Second DELTA backup try: self.backup_node( backup_dir, 'node', node, - backup_type="delta", options=["-j", "4", "--stream"]) + backup_type='delta', options=['--stream']) # we should die here because exception is what we expect to happen self.assertEqual( 1, 0, - "Expecting Error because of page " - "corruption in PostgreSQL instance.\n" - " Output: {0} \n CMD: {1}".format( + "Expecting Error because we are backing up an instance from the past" + "\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: self.assertTrue( - "WARNING: File" in e.message and - "blknum" in e.message and - "have wrong checksum" in e.message and - "try to fetch via SQL" in e.message and - "WARNING: page verification failed, " - "calculated checksum" in e.message and - "ERROR: query failed: " - "ERROR: invalid page in block" in e.message and - "query was: SELECT pg_catalog.pg_ptrack_get_block" in e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( + 'ERROR: Current START LSN ' in e.message and + 'is lower than START LSN ' in e.message and + 'of previous backup ' in e.message and + 'It may indicate that we are trying ' + 'to backup PostgreSQL instance from the past' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) - self.assertTrue( - self.show_pb(backup_dir, 'node')[1]['status'] == 'ERROR', - "Backup Status should be ERROR") + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + # @unittest.expectedFailure + def test_delta_pg_resetxlog(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'shared_buffers': '512MB', + 'max_wal_size': '3GB'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select nextval('t_seq')::int as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " +# "from generate_series(0,25600) i") + "from generate_series(0,2560) i") + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + + # kill the bastard + if self.verbose: + print('Killing postmaster. Losing Ptrack changes') + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + # now smack it with sledgehammer + if node.major_version >= 10: + pg_resetxlog_path = self.get_bin_path('pg_resetwal') + wal_dir = 'pg_wal' + else: + pg_resetxlog_path = self.get_bin_path('pg_resetxlog') + wal_dir = 'pg_xlog' + + self.run_binary( + [ + pg_resetxlog_path, + '-D', + node.data_dir, + '-o 42', + '-f' + ], + asynchronous=False) + + if not node.status(): + node.slow_start() + else: + print("Die! Die! Why won't you die?... Why won't you die?") + exit(1) + + # take ptrack backup +# self.backup_node( +# backup_dir, 'node', node, +# backup_type='delta', options=['--stream']) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because instance was brutalized by pg_resetxlog" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd) + ) + except ProbackupException as e: + self.assertIn( + 'Insert error message', + e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd)) + +# pgdata = self.pgdata_content(node.data_dir) +# +# node_restored = self.make_simple_node( +# base_dir=os.path.join(module_name, fname, 'node_restored')) +# node_restored.cleanup() +# +# self.restore_node( +# backup_dir, 'node', node_restored) +# +# pgdata_restored = self.pgdata_content(node_restored.data_dir) +# self.compare_pgdata(pgdata, pgdata_restored) # Clean after yourself self.del_test_dir(module_name, fname) diff --git a/tests/exclude.py b/tests/exclude.py index 48b7889c7..c9efe22af 100644 --- a/tests/exclude.py +++ b/tests/exclude.py @@ -18,19 +18,14 @@ def test_exclude_temp_tables(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', 'max_wal_senders': '2', - 'shared_buffers': '1GB', 'fsync': 'off', 'ptrack_enable': 'on'} - ) + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() - conn = node.connect() with node.connect("postgres") as conn: conn.execute( @@ -103,26 +98,22 @@ def test_exclude_temp_tables(self): def test_exclude_unlogged_tables_1(self): """ make node without archiving, create unlogged table, take full backup, - alter table to unlogged, take ptrack backup, restore ptrack backup, + alter table to unlogged, take delta backup, restore delta backup, check that PGDATA`s are physically the same """ fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - "shared_buffers": "10MB", - "fsync": "off", - 'ptrack_enable': 'on'} - ) + 'autovacuum': 'off', + "shared_buffers": "10MB"}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() conn = node.connect() with node.connect("postgres") as conn: @@ -142,15 +133,15 @@ def test_exclude_unlogged_tables_1(self): node.safe_psql('postgres', "alter table test set logged") self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + backup_dir, 'node', node, backup_type='delta', + options=['--stream'] ) pgdata = self.pgdata_content(node.data_dir) node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() self.restore_node( @@ -162,3 +153,85 @@ def test_exclude_unlogged_tables_1(self): # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_exclude_log_dir(self): + """ + check that by default 'log' and 'pg_log' directories are not backed up + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'logging_collector': 'on', + 'log_filename': 'postgresql.log'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, + backup_type='full', options=['--stream']) + + log_dir = node.safe_psql( + 'postgres', + 'show log_directory').rstrip() + + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + # check that PGDATA/log or PGDATA/pg_log do not exists + path = os.path.join(node.data_dir, log_dir) + log_file = os.path.join(path, 'postgresql.log') + self.assertTrue(os.path.exists(path)) + self.assertFalse(os.path.exists(log_file)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_exclude_log_dir_1(self): + """ + check that "--backup-pg-log" works correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'logging_collector': 'on', + 'log_filename': 'postgresql.log'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + log_dir = node.safe_psql( + 'postgres', + 'show log_directory').rstrip() + + self.backup_node( + backup_dir, 'node', node, + backup_type='full', options=['--stream', '--backup-pg-log']) + + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + # check that PGDATA/log or PGDATA/pg_log do not exists + path = os.path.join(node.data_dir, log_dir) + log_file = os.path.join(path, 'postgresql.log') + self.assertTrue(os.path.exists(path)) + self.assertTrue(os.path.exists(log_file)) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/expected/option_help.out b/tests/expected/option_help.out index 35f584062..2170e2773 100644 --- a/tests/expected/option_help.out +++ b/tests/expected/option_help.out @@ -7,7 +7,9 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. pg_probackup init -B backup-path - pg_probackup set-config -B backup-dir --instance=instance_name + pg_probackup set-config -B backup-path --instance=instance_name + [-D pgdata-path] + [--external-dirs=external-directories-paths] [--log-level-console=log-level-console] [--log-level-file=log-level-file] [--log-filename=log-filename] @@ -17,20 +19,34 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--log-rotation-age=log-rotation-age] [--retention-redundancy=retention-redundancy] [--retention-window=retention-window] + [--wal-depth=wal-depth] [--compress-algorithm=compress-algorithm] [--compress-level=compress-level] + [--archive-timeout=timeout] [-d dbname] [-h host] [-p port] [-U username] - [--master-db=db_name] [--master-host=host_name] - [--master-port=port] [--master-user=user_name] - [--replica-timeout=timeout] - - pg_probackup show-config -B backup-dir --instance=instance_name + [--remote-proto] [--remote-host] + [--remote-port] [--remote-path] [--remote-user] + [--ssh-options] + [--restore-command=cmdline] [--archive-host=destination] + [--archive-port=port] [--archive-user=username] + [--help] + + pg_probackup set-backup -B backup-path --instance=instance_name + -i backup-id [--ttl=interval] [--expire-time=timestamp] + [--note=text] + [--help] + + pg_probackup show-config -B backup-path --instance=instance_name [--format=format] + [--help] pg_probackup backup -B backup-path -b backup-mode --instance=instance_name - [-C] [--stream [-S slot-name]] [--backup-pg-log] - [-j num-threads] [--archive-timeout=archive-timeout] - [--progress] + [-D pgdata-path] [-C] + [--stream [-S slot-name]] [--temp-slot] + [--backup-pg-log] [-j num-threads] [--progress] + [--no-validate] [--skip-block-validation] + [--external-dirs=external-directories-paths] + [--no-sync] [--log-level-console=log-level-console] [--log-level-file=log-level-file] [--log-filename=log-filename] @@ -38,58 +54,115 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--log-directory=log-directory] [--log-rotation-size=log-rotation-size] [--log-rotation-age=log-rotation-age] - [--delete-expired] [--delete-wal] + [--delete-expired] [--delete-wal] [--merge-expired] [--retention-redundancy=retention-redundancy] [--retention-window=retention-window] + [--wal-depth=wal-depth] [--compress] [--compress-algorithm=compress-algorithm] [--compress-level=compress-level] + [--archive-timeout=archive-timeout] [-d dbname] [-h host] [-p port] [-U username] [-w --no-password] [-W --password] - [--master-db=db_name] [--master-host=host_name] - [--master-port=port] [--master-user=user_name] - [--replica-timeout=timeout] - - pg_probackup restore -B backup-dir --instance=instance_name - [-D pgdata-dir] [-i backup-id] [--progress] - [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]] - [--timeline=timeline] [-T OLDDIR=NEWDIR] - [--immediate] [--recovery-target-name=target-name] + [--remote-proto] [--remote-host] + [--remote-port] [--remote-path] [--remote-user] + [--ssh-options] + [--ttl=interval] [--expire-time=timestamp] [--note=text] + [--help] + + pg_probackup restore -B backup-path --instance=instance_name + [-D pgdata-path] [-i backup-id] [-j num-threads] + [--recovery-target-time=time|--recovery-target-xid=xid + |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]] + [--recovery-target-timeline=timeline] + [--recovery-target=immediate|latest] + [--recovery-target-name=target-name] [--recovery-target-action=pause|promote|shutdown] - [--restore-as-replica] - [--no-validate] - - pg_probackup validate -B backup-dir [--instance=instance_name] - [-i backup-id] [--progress] - [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]] + [--restore-command=cmdline] + [-R | --restore-as-replica] [--force] + [--primary-conninfo=primary_conninfo] + [-S | --primary-slot-name=slotname] + [--no-validate] [--skip-block-validation] + [-T OLDDIR=NEWDIR] [--progress] + [--external-mapping=OLDDIR=NEWDIR] + [--skip-external-dirs] [--no-sync] + [-I | --incremental-mode=none|checksum|lsn] + [--db-include | --db-exclude] + [--remote-proto] [--remote-host] + [--remote-port] [--remote-path] [--remote-user] + [--ssh-options] + [--archive-host=hostname] + [--archive-port=port] [--archive-user=username] + [--help] + + pg_probackup validate -B backup-path [--instance=instance_name] + [-i backup-id] [--progress] [-j num-threads] + [--recovery-target-time=time|--recovery-target-xid=xid + |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]] + [--recovery-target-timeline=timeline] [--recovery-target-name=target-name] - [--timeline=timeline] + [--skip-block-validation] + [--help] - pg_probackup show -B backup-dir + pg_probackup checkdb [-B backup-path] [--instance=instance_name] + [-D pgdata-path] [--progress] [-j num-threads] + [--amcheck] [--skip-block-validation] + [--heapallindexed] + [--help] + + pg_probackup show -B backup-path [--instance=instance_name [-i backup-id]] - [--format=format] + [--format=format] [--archive] + [--help] - pg_probackup delete -B backup-dir --instance=instance_name - [--wal] [-i backup-id | --expired] + pg_probackup delete -B backup-path --instance=instance_name + [-j num-threads] [--progress] + [--retention-redundancy=retention-redundancy] + [--retention-window=retention-window] + [--wal-depth=wal-depth] + [-i backup-id | --delete-expired | --merge-expired | --status=backup_status] + [--delete-wal] + [--dry-run] + [--help] - pg_probackup merge -B backup-dir --instance=instance_name - -i backup-id + pg_probackup merge -B backup-path --instance=instance_name + -i backup-id [--progress] [-j num-threads] + [--help] - pg_probackup add-instance -B backup-dir -D pgdata-dir + pg_probackup add-instance -B backup-path -D pgdata-path --instance=instance_name + [--external-dirs=external-directories-paths] + [--remote-proto] [--remote-host] + [--remote-port] [--remote-path] [--remote-user] + [--ssh-options] + [--help] - pg_probackup del-instance -B backup-dir + pg_probackup del-instance -B backup-path --instance=instance_name + [--help] - pg_probackup archive-push -B backup-dir --instance=instance_name - --wal-file-path=wal-file-path + pg_probackup archive-push -B backup-path --instance=instance_name --wal-file-name=wal-file-name - [--compress [--compress-level=compress-level]] - [--overwrite] + [-j num-threads] [--batch-size=batch_size] + [--archive-timeout=timeout] + [--no-ready-rename] [--no-sync] + [--overwrite] [--compress] + [--compress-algorithm=compress-algorithm] + [--compress-level=compress-level] + [--remote-proto] [--remote-host] + [--remote-port] [--remote-path] [--remote-user] + [--ssh-options] + [--help] - pg_probackup archive-get -B backup-dir --instance=instance_name + pg_probackup archive-get -B backup-path --instance=instance_name --wal-file-path=wal-file-path --wal-file-name=wal-file-name + [-j num-threads] [--batch-size=batch_size] + [--no-validate-wal] + [--remote-proto] [--remote-host] + [--remote-port] [--remote-path] [--remote-user] + [--ssh-options] + [--help] Read the website for details. Report bugs to . diff --git a/tests/expected/option_version.out b/tests/expected/option_version.out index 35e212c3e..47c19fef5 100644 --- a/tests/expected/option_version.out +++ b/tests/expected/option_version.out @@ -1 +1 @@ -pg_probackup 2.0.18 \ No newline at end of file +pg_probackup 2.4.2 \ No newline at end of file diff --git a/tests/external.py b/tests/external.py new file mode 100644 index 000000000..9d14d7558 --- /dev/null +++ b/tests/external.py @@ -0,0 +1,2549 @@ +import unittest +import os +from time import sleep +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from .helpers.cfs_helpers import find_by_name +import shutil + + +module_name = 'external' + +# TODO: add some ptrack tests +class ExternalTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_basic_external(self): + """ + make node, create external directory, take backup + with external directory, restore backup, check that + external directory was successfully copied + """ + fname = self.id().split('.')[3] + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + set_replication=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + external_dir = self.get_tblspace_path(node, 'somedirectory') + + # create directory in external_directory + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL backup with external directory pointing to a file + file_path = os.path.join(core_dir, 'file') + open(file_path, "w+") + + try: + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=[ + '--external-dirs={0}'.format(file_path)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir point to a file" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: --external-dirs option' in e.message and + 'directory or symbolic link expected' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + # Fill external directories + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir, options=["-j", "4"]) + + # Full backup with external dir + self.backup_node( + backup_dir, 'node', node, + options=[ + '--external-dirs={0}'.format(external_dir)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_external_none(self): + """ + make node, create external directory, take backup + with external directory, take delta backup with --external-dirs=none, + restore delta backup, check that + external directory was not copied + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + set_replication=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + external_dir = self.get_tblspace_path(node, 'somedirectory') + + # create directory in external_directory + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + # Fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir, options=["-j", "4"]) + + # Full backup with external dir + self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--external-dirs={0}'.format(external_dir)]) + + # Delta backup without external directory + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=['--external-dirs=none', '--stream']) + + shutil.rmtree(external_dir, ignore_errors=True) + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_external_dirs_overlapping(self): + """ + make node, create directory, + take backup with two external directories pointing to + the same directory, backup should fail + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + set_replication=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # create directory in external_directory + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + os.mkdir(external_dir1) + os.mkdir(external_dir2) + + # Full backup with external dirs + try: + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}{1}{0}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir1)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because tablespace mapping is incorrect" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: External directory path (-E option)' in e.message and + 'contain another external directory' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_external_dir_mapping(self): + """ + make node, take full backup, check that restore with + external-dir mapping will end with error, take page backup, + check that restore with external-dir mapping will end with + success + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # Fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2, external_dir2_new)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because tablespace mapping is incorrect" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: --external-mapping option' in e.message and + 'have an entry in list of external directories' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_backup_multiple_external(self): + """check that cmdline has priority over config""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.set_config( + backup_dir, 'node', + options=['-E', external_dir1]) + + # cmdline option MUST override options in config + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", external_dir2]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs', 'external_dir1']) + + node.cleanup() + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_backward_compatibility(self): + """ + take backup with old binary without external dirs support + take delta backup with new binary and 2 external directories + restore delta backup, check that incremental chain + restored correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + node.slow_start() + + node.pgbench_init(scale=3) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + # fill external directories with changed data + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories using new binary + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE chain with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_backward_compatibility_merge_1(self): + """ + take backup with old binary without external dirs support + take delta backup with new binary and 2 external directories + merge delta backup ajd restore it + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + node.slow_start() + + node.pgbench_init(scale=3) + + # tmp FULL backup with old binary + tmp_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1']) + pgbench.wait() + + # delta backup with external directories using new binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge chain chain with new binary + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore merged backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_backward_compatibility_merge_2(self): + """ + take backup with old binary without external dirs support + take delta backup with new binary and 2 external directories + merge delta backup and restore it + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + node.slow_start() + + node.pgbench_init(scale=3) + + # tmp FULL backup with old binary + tmp_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1']) + pgbench.wait() + + # delta backup with external directories using new binary + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1']) + pgbench.wait() + + # Fill external dirs with changed data + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, + options=['-j', '4', '--skip-external-dirs']) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, + options=['-j', '4', '--skip-external-dirs']) + + # delta backup without external directories using old binary + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge chain using new binary + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore merged backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path( + node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path( + node_restored, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=3) + + # take temp FULL backup + tmp_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + # change data a bit + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # delta backup with external directories using new binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + print(self.show_pb(backup_dir, 'node', as_json=False, as_text=True)) + + # Merge + print(self.merge_backup(backup_dir, 'node', backup_id=backup_id, + options=['--log-level-file=VERBOSE'])) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_skip_external_dirs(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # FULL backup with old data + tmp_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with old data + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # change data a bit + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup with external directories + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # drop old external data + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + # fill external directories with new data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, + options=["-j", "4", "--skip-external-dirs"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, + options=["-j", "4", "--skip-external-dirs"]) + + # DELTA backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # merge backups without external directories + self.merge_backup( + backup_dir, 'node', + backup_id=backup_id, options=['--skip-external-dirs']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_1(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup with changed data + backup_id = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + # fill external directories with changed data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories using new binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_3(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # FULL backup + self.backup_node(backup_dir, 'node', node, options=["-j", "4"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node) + + # fill external directories with changed data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # page backup with external directories + self.backup_node( + backup_dir, 'node', node, backup_type="page", + options=[ + "-j", "4", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # page backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="page", + options=[ + "-j", "4", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.merge_backup( + backup_dir, 'node', backup_id=backup_id, + options=['--log-level-file=verbose']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_2(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + # fill external directories with changed data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # delta backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + # delta backup without external directories + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_restore_external_changed_data(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + + # set externals + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + tmp_id = self.backup_node( + backup_dir, 'node', + node, options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # change data a bit + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # fill external directories with changed data + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, backup_id=backup_id, + options=["-j", "4", "--skip-external-dirs"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, backup_id=backup_id, + options=["-j", "4", "--skip-external-dirs"]) + + # change data a bit more + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # Delta backup with external directories + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Restore + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_restore_external_changed_data_1(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'max_wal_size': '32MB'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=1) + + # set externals + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + tmp_id = self.backup_node( + backup_dir, 'node', + node, options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # change data a bit + pgbench = node.pgbench(options=['-T', '5', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # fill external directories with changed data + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, backup_id=backup_id, + options=["-j", "4", "--skip-external-dirs"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, backup_id=backup_id, + options=["-j", "4", "--skip-external-dirs"]) + + # change data a bit more + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # Delta backup with only one external directory + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", external_dir1]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs', 'external_dir2']) + + # Restore + node.cleanup() + shutil.rmtree(node._base_dir) + + # create empty file in external_dir2 + os.mkdir(node._base_dir) + os.mkdir(external_dir2) + with open(os.path.join(external_dir2, 'file'), 'w+') as f: + f.close() + + output = self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + self.assertNotIn( + 'externaldir2', + output) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs', 'external_dir2']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_merge_external_changed_data(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'max_wal_size': '32MB'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + + # set externals + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + tmp_id = self.backup_node( + backup_dir, 'node', + node, options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # change data a bit + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # fill external directories with changed data + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, backup_id=backup_id, + options=["-j", "4", "--skip-external-dirs"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, backup_id=backup_id, + options=["-j", "4", "--skip-external-dirs"]) + + # change data a bit more + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # Delta backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge + self.merge_backup(backup_dir, 'node', backup_id) + + # Restore + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_restore_skip_external(self): + """ + Check that --skip-external-dirs works correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # temp FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # FULL backup with external directories + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir2)]) + + # delete first externals, so pgdata_compare + # will be capable of detecting redundant + # external files after restore + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--skip-external-dirs"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_dir_is_symlink(self): + """ + Check that backup works correctly if external dir is symlink, + symlink pointing to external dir should be followed, + but restored as directory + """ + if os.name == 'nt': + return unittest.skip('Skipped for Windows') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # temp FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + # fill some directory with data + core_dir = os.path.join(self.tmp_path, module_name, fname) + symlinked_dir = os.path.join(core_dir, 'symlinked') + + self.restore_node( + backup_dir, 'node', node, + data_dir=symlinked_dir, options=["-j", "4"]) + + # drop temp FULL backup + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # create symlink to directory in external directory + os.symlink(symlinked_dir, external_dir) + + # FULL backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # RESTORE + node_restored.cleanup() + + external_dir_new = self.get_tblspace_path( + node_restored, 'external_dir') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", "--external-mapping={0}={1}".format( + external_dir, external_dir_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.assertEqual( + external_dir, + self.show_pb( + backup_dir, 'node', + backup_id=backup_id)['external-dirs']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_dir_contain_symlink_on_dir(self): + """ + Check that backup works correctly if external dir is symlink, + symlink pointing to external dir should be followed, + but restored as directory + """ + if os.name == 'nt': + return unittest.skip('Skipped for Windows') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + dir_in_external_dir = os.path.join(external_dir, 'dir') + + # temp FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + # fill some directory with data + core_dir = os.path.join(self.tmp_path, module_name, fname) + symlinked_dir = os.path.join(core_dir, 'symlinked') + + self.restore_node( + backup_dir, 'node', node, + data_dir=symlinked_dir, options=["-j", "4"]) + + # drop temp FULL backup + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # create symlink to directory in external directory + os.mkdir(external_dir) + os.symlink(symlinked_dir, dir_in_external_dir) + + # FULL backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # RESTORE + node_restored.cleanup() + + external_dir_new = self.get_tblspace_path( + node_restored, 'external_dir') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", "--external-mapping={0}={1}".format( + external_dir, external_dir_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.assertEqual( + external_dir, + self.show_pb( + backup_dir, 'node', + backup_id=backup_id)['external-dirs']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_dir_contain_symlink_on_file(self): + """ + Check that backup works correctly if external dir is symlink, + symlink pointing to external dir should be followed, + but restored as directory + """ + if os.name == 'nt': + return unittest.skip('Skipped for Windows') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + file_in_external_dir = os.path.join(external_dir, 'file') + + # temp FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + # fill some directory with data + core_dir = os.path.join(self.tmp_path, module_name, fname) + symlinked_dir = os.path.join(core_dir, 'symlinked') + + self.restore_node( + backup_dir, 'node', node, + data_dir=symlinked_dir, options=["-j", "4"]) + + # drop temp FULL backup + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # create symlink to directory in external directory + src_file = os.path.join(symlinked_dir, 'postgresql.conf') + os.mkdir(external_dir) + os.chmod(external_dir, 0700) + os.symlink(src_file, file_in_external_dir) + + # FULL backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # RESTORE + node_restored.cleanup() + + external_dir_new = self.get_tblspace_path( + node_restored, 'external_dir') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", "--external-mapping={0}={1}".format( + external_dir, external_dir_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.assertEqual( + external_dir, + self.show_pb( + backup_dir, 'node', + backup_id=backup_id)['external-dirs']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_dir_is_tablespace(self): + """ + Check that backup fails with error + if external directory points to tablespace + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + self.create_tblspace_in_node( + node, 'tblspace1', tblspc_path=external_dir) + + node.pgbench_init(scale=1, tablespace='tblspace1') + + # FULL backup with external directories + try: + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir points to the tablespace" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'External directory path (-E option)', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_restore_external_dir_not_empty(self): + """ + Check that backup fails with error + if external directory point to not empty tablespace and + if remapped directory also isn`t empty + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # create empty file in external directory + # open(os.path.join(external_dir, 'file'), 'a').close() + os.mkdir(external_dir) + with open(os.path.join(external_dir, 'file'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + node.cleanup() + + try: + self.restore_node(backup_dir, 'node', node) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir is not empty" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'External directory is not empty', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + external_dir_new = self.get_tblspace_path(node, 'external_dir_new') + + # create empty file in directory, which will be a target of + # remapping + os.mkdir(external_dir_new) + with open(os.path.join(external_dir_new, 'file1'), 'w+') as f: + f.close() + + try: + self.restore_node( + backup_dir, 'node', node, + options=['--external-mapping={0}={1}'.format( + external_dir, external_dir_new)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because remapped external dir is not empty" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'External directory is not empty', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_restore_external_dir_is_missing(self): + """ + take FULL backup with not empty external directory + delete external directory + take DELTA backup with external directory, which + should fail + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # create empty file in external directory + # open(os.path.join(external_dir, 'file'), 'a').close() + os.mkdir(external_dir) + with open(os.path.join(external_dir, 'file'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + # drop external directory + shutil.rmtree(external_dir, ignore_errors=True) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir is missing" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: External directory is not found:', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + # take DELTA without external directories + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=["-j", "4", "--stream"]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Restore Delta backup + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_merge_external_dir_is_missing(self): + """ + take FULL backup with not empty external directory + delete external directory + take DELTA backup with external directory, which + should fail, + take DELTA backup without external directory, + merge it into FULL, restore and check + data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # create empty file in external directory + # open(os.path.join(external_dir, 'file'), 'a').close() + os.mkdir(external_dir) + with open(os.path.join(external_dir, 'file'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + # drop external directory + shutil.rmtree(external_dir, ignore_errors=True) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir is missing" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: External directory is not found:', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) + + # take DELTA without external directories + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=["-j", "4", "--stream"]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_restore_external_dir_is_empty(self): + """ + take FULL backup with not empty external directory + drop external directory content + take DELTA backup with the same external directory + restore DELRA backup, check that restored + external directory is empty + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # create empty file in external directory + # open(os.path.join(external_dir, 'file'), 'a').close() + os.mkdir(external_dir) + os.chmod(external_dir, 0700) + with open(os.path.join(external_dir, 'file'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + # make external directory empty + os.remove(os.path.join(external_dir, 'file')) + + # take DELTA backup with empty external directory + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Restore Delta backup + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_merge_external_dir_is_empty(self): + """ + take FULL backup with not empty external directory + drop external directory content + take DELTA backup with the same external directory + merge backups and restore FULL, check that restored + external directory is empty + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # create empty file in external directory + # open(os.path.join(external_dir, 'file'), 'a').close() + os.mkdir(external_dir) + os.chmod(external_dir, 0700) + with open(os.path.join(external_dir, 'file'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + # make external directory empty + os.remove(os.path.join(external_dir, 'file')) + + # take DELTA backup with empty external directory + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + "-j", "4", "--stream", + "-E", external_dir]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_restore_external_dir_string_order(self): + """ + take FULL backup with not empty external directory + drop external directory content + take DELTA backup with the same external directory + restore DELRA backup, check that restored + external directory is empty + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir_1 = self.get_tblspace_path(node, 'external_dir_1') + external_dir_2 = self.get_tblspace_path(node, 'external_dir_2') + + # create empty file in external directory + os.mkdir(external_dir_1) + os.chmod(external_dir_1, 0700) + with open(os.path.join(external_dir_1, 'fileA'), 'w+') as f: + f.close() + + os.mkdir(external_dir_2) + os.chmod(external_dir_2, 0700) + with open(os.path.join(external_dir_2, 'fileZ'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir_1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir_2)]) + + with open(os.path.join(external_dir_1, 'fileB'), 'w+') as f: + f.close() + + with open(os.path.join(external_dir_2, 'fileY'), 'w+') as f: + f.close() + + # take DELTA backup and swap external_dir_2 and external_dir_1 + # in external_dir_str + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir_2, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir_1)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Restore Delta backup + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_merge_external_dir_string_order(self): + """ + take FULL backup with not empty external directory + drop external directory content + take DELTA backup with the same external directory + restore DELRA backup, check that restored + external directory is empty + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir_1 = self.get_tblspace_path(node, 'external_dir_1') + external_dir_2 = self.get_tblspace_path(node, 'external_dir_2') + + # create empty file in external directory + os.mkdir(external_dir_1) + os.chmod(external_dir_1, 0700) + with open(os.path.join(external_dir_1, 'fileA'), 'w+') as f: + f.close() + + os.mkdir(external_dir_2) + os.chmod(external_dir_2, 0700) + with open(os.path.join(external_dir_2, 'fileZ'), 'w+') as f: + f.close() + + # FULL backup with external directory + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir_1, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir_2)]) + + with open(os.path.join(external_dir_1, 'fileB'), 'w+') as f: + f.close() + + with open(os.path.join(external_dir_2, 'fileY'), 'w+') as f: + f.close() + + # take DELTA backup and swap external_dir_2 and external_dir_1 + # in external_dir_str + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + "-j", "4", "--stream", + "-E", "{0}{1}{2}".format( + external_dir_2, + self.EXTERNAL_DIRECTORY_DELIMITER, + external_dir_1)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge backups + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_smart_restore_externals(self): + """ + make node, create database, take full backup with externals, + take incremental backup without externals and restore it, + make sure that files from externals are not copied during restore + https://github.com/postgrespro/pg_probackup/issues/63 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # fill external directories with data + tmp_id = self.backup_node(backup_dir, 'node', node) + + external_dir_1 = self.get_tblspace_path(node, 'external_dir_1') + external_dir_2 = self.get_tblspace_path(node, 'external_dir_2') + + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir_1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir_2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # create database + node.safe_psql( + "postgres", + "CREATE DATABASE testdb") + + # take FULL backup + full_id = self.backup_node(backup_dir, 'node', node) + + # drop database + node.safe_psql( + "postgres", + "DROP DATABASE testdb") + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # restore PAGE backup + node.cleanup() + self.restore_node( + backup_dir, 'node', node, backup_id=page_id, + options=['--no-validate', '--log-level-file=VERBOSE']) + + logfile = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(logfile, 'r') as f: + logfile_content = f.read() + + # get delta between FULL and PAGE filelists + filelist_full = self.get_backup_filelist( + backup_dir, 'node', full_id) + + filelist_page = self.get_backup_filelist( + backup_dir, 'node', page_id) + + filelist_diff = self.get_backup_filelist_diff( + filelist_full, filelist_page) + + for file in filelist_diff: + self.assertNotIn(file, logfile_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_external_validation(self): + """ + make node, create database, + take full backup with external directory, + corrupt external file in backup, + run validate which should fail + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # take temp FULL backup + tmp_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + external_dir = self.get_tblspace_path(node, 'external_dir') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, backup_id=tmp_id, + data_dir=external_dir, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # take FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', '-E', "{0}".format(external_dir)]) + + # Corrupt file + file = os.path.join( + backup_dir, 'backups', 'node', full_id, + 'external_directories', 'externaldir1', 'postgresql.auto.conf') + + with open(file, "r+b", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + try: + self.validate_pb(backup_dir) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because file in external dir is corrupted" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Invalid CRC of backup file', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', full_id)['status'], + 'Backup STATUS should be "CORRUPT"') + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/false_positive.py b/tests/false_positive.py index 1884159b2..fc9ee4b62 100644 --- a/tests/false_positive.py +++ b/tests/false_positive.py @@ -13,32 +13,25 @@ class FalsePositive(ProbackupTest, unittest.TestCase): # @unittest.skip("skip") @unittest.expectedFailure def test_validate_wal_lost_segment(self): - """Loose segment located between backups. ExpectedFailure. This is BUG """ + """ + Loose segment located between backups. ExpectedFailure. This is BUG + """ fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2'} - ) + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() self.backup_node(backup_dir, 'node', node) # make some wals - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "10"] - ) - pgbench.wait() - pgbench.stdout.close() + node.pgbench_init(scale=5) # delete last wal segment wals_dir = os.path.join(backup_dir, "wal", 'node') @@ -63,15 +56,14 @@ def test_incremental_backup_corrupt_full_1(self): """page-level backup with corrupted full backup""" fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on'} - ) + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() backup_id = self.backup_node(backup_dir, 'node', node) file = os.path.join( @@ -117,27 +109,29 @@ def test_incremental_backup_corrupt_full_1(self): # Clean after yourself self.del_test_dir(module_name, fname) - # @unittest.skip("skip") @unittest.expectedFailure def test_ptrack_concurrent_get_and_clear_1(self): """make node, make full and ptrack stream backups," " restore them and check data correctness""" + + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on' - } - ) + ptrack_enable=True, + initdb_params=['--data-checksums']) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() node.safe_psql( "postgres", @@ -148,7 +142,7 @@ def test_ptrack_concurrent_get_and_clear_1(self): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) @@ -195,33 +189,34 @@ def test_ptrack_concurrent_get_and_clear_1(self): # Logical comparison self.assertEqual( result, - node.safe_psql("postgres", "SELECT * FROM t_heap") - ) + node.safe_psql("postgres", "SELECT * FROM t_heap")) # Clean after yourself self.del_test_dir(module_name, fname) - # @unittest.skip("skip") @unittest.expectedFailure def test_ptrack_concurrent_get_and_clear_2(self): """make node, make full and ptrack stream backups," " restore them and check data correctness""" + + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on' - } - ) + ptrack_enable=True, + initdb_params=['--data-checksums']) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() node.safe_psql( "postgres", @@ -232,7 +227,7 @@ def test_ptrack_concurrent_get_and_clear_2(self): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) @@ -302,32 +297,254 @@ def test_ptrack_concurrent_get_and_clear_2(self): # @unittest.skip("skip") @unittest.expectedFailure - def test_multiple_delete(self): - """delete multiple backups""" + def test_pg_10_waldir(self): + """ + test group access for PG >= 11 + """ + if self.pg_config_version < self.version_to_num('10.0'): + return unittest.skip('You need PostgreSQL >= 10 for this test') + + fname = self.id().split('.')[3] + wal_dir = os.path.join( + os.path.join(self.tmp_path, module_name, fname), 'wal_dir') + shutil.rmtree(wal_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=[ + '--data-checksums', + '--waldir={0}'.format(wal_dir)]) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # take FULL backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + # restore backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored) + + # compare pgdata permissions + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.assertTrue( + os.path.islink(os.path.join(node_restored.data_dir, 'pg_wal')), + 'pg_wal should be symlink') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.expectedFailure + # @unittest.skip("skip") + def test_recovery_target_time_backup_victim(self): + """ + Check that for validation to recovery target + probackup chooses valid backup + https://github.com/postgrespro/pg_probackup/issues/104 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + target_time = node.safe_psql( + "postgres", + "select now()").rstrip() + + node.safe_psql( + "postgres", + "create table t_heap1 as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,100) i") + + gdb = self.backup_node(backup_dir, 'node', node, gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + gdb.remove_all_breakpoints() + gdb._execute('signal SIGINT') + gdb.continue_execution_until_error() + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "ERROR"') + + self.validate_pb( + backup_dir, 'node', + options=['--recovery-target-time={0}'.format(target_time)]) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.expectedFailure + # @unittest.skip("skip") + def test_recovery_target_lsn_backup_victim(self): + """ + Check that for validation to recovery target + probackup chooses valid backup + https://github.com/postgrespro/pg_probackup/issues/104 + """ fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") node.safe_psql( "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - # first full backup - backup_1_id = self.backup_node(backup_dir, 'node', node) - # second full backup - backup_2_id = self.backup_node(backup_dir, 'node', node) - # third full backup - backup_3_id = self.backup_node(backup_dir, 'node', node) - node.stop() - - self.delete_pb(backup_dir, 'node', options= - ["-i {0}".format(backup_1_id), "-i {0}".format(backup_2_id), "-i {0}".format(backup_3_id)]) + "create table t_heap1 as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,100) i") + + gdb = self.backup_node( + backup_dir, 'node', node, + options=['--log-level-console=LOG'], gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + gdb.remove_all_breakpoints() + gdb._execute('signal SIGINT') + gdb.continue_execution_until_error() + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "ERROR"') + + self.switch_wal_segment(node) + + target_lsn = self.show_pb(backup_dir, 'node', backup_id)['start-lsn'] + + self.validate_pb( + backup_dir, 'node', + options=['--recovery-target-lsn={0}'.format(target_lsn)]) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + @unittest.expectedFailure + def test_streaming_timeout(self): + """ + Illustrate the problem of loosing exact error + message because our WAL streaming engine is "borrowed" + from pg_receivexlog + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '1h', + 'wal_sender_timeout': '5s'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True, + options=['--stream', '--log-level-file=LOG']) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + + sleep(10) + gdb.continue_execution_until_error() + gdb._execute('detach') + sleep(2) + + log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(log_file_path) as f: + log_content = f.read() + + self.assertIn( + 'could not receive data from WAL stream', + log_content) + + self.assertIn( + 'ERROR: Problem in receivexlog', + log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + @unittest.expectedFailure + def test_validate_all_empty_catalog(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because backup_dir is empty.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: This backup catalog contains no backup instances', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) # Clean after yourself self.del_test_dir(module_name, fname) diff --git a/tests/helpers/ptrack_helpers.py b/tests/helpers/ptrack_helpers.py index 0d04d8983..19d399d4b 100644 --- a/tests/helpers/ptrack_helpers.py +++ b/tests/helpers/ptrack_helpers.py @@ -7,9 +7,8 @@ import testgres import hashlib import re -import pwd +import getpass import select -import psycopg2 from time import sleep import re import json @@ -48,21 +47,18 @@ 'column': 'tsvector', 'relation': 't_heap' }, + 't_hash': { + 'type': 'hash', + 'column': 'id', + 'relation': 't_heap' + }, + 't_bloom': { + 'type': 'bloom', + 'column': 'id', + 'relation': 't_heap' + } } -archive_script = """ -#!/bin/bash -count=$(ls {backup_dir}/test00* | wc -l) -if [ $count -ge {count_limit} ] -then - exit 1 -else - cp $1 {backup_dir}/wal/{node_name}/$2 - count=$((count+1)) - touch {backup_dir}/test00$count - exit 0 -fi -""" warning = """ Wrong splint in show_pb Original Header: @@ -92,8 +88,14 @@ def dir_files(base_dir): def is_enterprise(): # pg_config --help + if os.name == 'posix': + cmd = [os.environ['PG_CONFIG'], '--help'] + + elif os.name == 'nt': + cmd = [[os.environ['PG_CONFIG']], ['--help']] + p = subprocess.Popen( - [os.environ['PG_CONFIG'], '--help'], + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) @@ -115,34 +117,29 @@ def __str__(self): def slow_start(self, replica=False): # wait for https://github.com/postgrespro/testgres/pull/50 - # self.poll_query_until( - # "postgres", - # "SELECT not pg_is_in_recovery()", - # raise_operational_error=False) +# self.start() +# self.poll_query_until( +# "postgres", +# "SELECT not pg_is_in_recovery()", +# suppress={testgres.NodeConnection}) + if replica: + query = 'SELECT pg_is_in_recovery()' + else: + query = 'SELECT not pg_is_in_recovery()' self.start() - if not replica: - while True: - try: - self.poll_query_until( - "postgres", - "SELECT not pg_is_in_recovery()") + while True: + try: + output = self.safe_psql('template1', query).rstrip() + + if output == 't': break - except Exception as e: - continue - else: - self.poll_query_until( - "postgres", - "SELECT pg_is_in_recovery()") -# while True: -# try: -# self.poll_query_until( -# "postgres", -# "SELECT pg_is_in_recovery()") -# break -# except ProbackupException as e: -# continue + except testgres.QueryException as e: + if 'database system is starting up' in e[0]: + continue + else: + raise e class ProbackupTest(object): @@ -158,18 +155,18 @@ def __init__(self, *args, **kwargs): self.test_env = os.environ.copy() envs_list = [ - "LANGUAGE", - "LC_ALL", - "PGCONNECT_TIMEOUT", - "PGDATA", - "PGDATABASE", - "PGHOSTADDR", - "PGREQUIRESSL", - "PGSERVICE", - "PGSSLMODE", - "PGUSER", - "PGPORT", - "PGHOST" + 'LANGUAGE', + 'LC_ALL', + 'PGCONNECT_TIMEOUT', + 'PGDATA', + 'PGDATABASE', + 'PGHOSTADDR', + 'PGREQUIRESSL', + 'PGSERVICE', + 'PGSSLMODE', + 'PGUSER', + 'PGPORT', + 'PGHOST' ] for e in envs_list: @@ -178,8 +175,8 @@ def __init__(self, *args, **kwargs): except: pass - self.test_env["LC_MESSAGES"] = "C" - self.test_env["LC_TIME"] = "C" + self.test_env['LC_MESSAGES'] = 'C' + self.test_env['LC_TIME'] = 'C' self.paranoia = False if 'PG_PROBACKUP_PARANOIA' in self.test_env: @@ -213,7 +210,7 @@ def __init__(self, *args, **kwargs): self.user = self.get_username() self.probackup_path = None - if "PGPROBACKUPBIN" in self.test_env: + if 'PGPROBACKUPBIN' in self.test_env: if ( os.path.isfile(self.test_env["PGPROBACKUPBIN"]) and os.access(self.test_env["PGPROBACKUPBIN"], os.X_OK) @@ -221,15 +218,112 @@ def __init__(self, *args, **kwargs): self.probackup_path = self.test_env["PGPROBACKUPBIN"] else: if self.verbose: - print('PGPROBINDIR is not an executable file') + print('PGPROBACKUPBIN is not an executable file') + + if not self.probackup_path: + probackup_path_tmp = os.path.join( + testgres.get_pg_config()['BINDIR'], 'pg_probackup') + + if os.path.isfile(probackup_path_tmp): + if not os.access(probackup_path_tmp, os.X_OK): + print('{0} is not an executable file'.format( + probackup_path_tmp)) + else: + self.probackup_path = probackup_path_tmp + if not self.probackup_path: - self.probackup_path = os.path.abspath(os.path.join( - self.dir_path, "../pg_probackup")) + probackup_path_tmp = os.path.abspath(os.path.join( + self.dir_path, '../pg_probackup')) + + if os.path.isfile(probackup_path_tmp): + if not os.access(probackup_path_tmp, os.X_OK): + print('{0} is not an executable file'.format( + probackup_path_tmp)) + else: + self.probackup_path = probackup_path_tmp + + if not self.probackup_path: + print('pg_probackup binary is not found') + exit(1) + + self.probackup_version = None + + try: + self.probackup_version_output = subprocess.check_output( + [self.probackup_path, "--version"], + stderr=subprocess.STDOUT, + ).decode('utf-8') + except subprocess.CalledProcessError as e: + raise ProbackupException(e.output.decode('utf-8')) + + self.probackup_version = re.search(r"\d+\.\d+\.\d+", self.probackup_version_output).group(0) + + if os.name == 'posix': + self.EXTERNAL_DIRECTORY_DELIMITER = ':' + os.environ['PATH'] = os.path.dirname( + self.probackup_path) + ':' + os.environ['PATH'] + + elif os.name == 'nt': + self.EXTERNAL_DIRECTORY_DELIMITER = ';' + os.environ['PATH'] = os.path.dirname( + self.probackup_path) + ';' + os.environ['PATH'] + + self.probackup_old_path = None + + if 'PGPROBACKUPBIN_OLD' in self.test_env: + if ( + os.path.isfile(self.test_env['PGPROBACKUPBIN_OLD']) and + os.access(self.test_env['PGPROBACKUPBIN_OLD'], os.X_OK) + ): + self.probackup_old_path = self.test_env['PGPROBACKUPBIN_OLD'] + else: + if self.verbose: + print('PGPROBACKUPBIN_OLD is not an executable file') + + self.remote = False + self.remote_host = None + self.remote_port = None + self.remote_user = None + + if 'PGPROBACKUP_SSH_REMOTE' in self.test_env: + if self.test_env['PGPROBACKUP_SSH_REMOTE'] == 'ON': + self.remote = True + + self.ptrack = False + if 'PG_PROBACKUP_PTRACK' in self.test_env: + if self.test_env['PG_PROBACKUP_PTRACK'] == 'ON': + self.ptrack = True + + os.environ["PGAPPNAME"] = "pg_probackup" + + @property + def pg_config_version(self): + return self.version_to_num( + testgres.get_pg_config()['VERSION'].split(" ")[1]) + +# if 'PGPROBACKUP_SSH_HOST' in self.test_env: +# self.remote_host = self.test_env['PGPROBACKUP_SSH_HOST'] +# else +# print('PGPROBACKUP_SSH_HOST is not set') +# exit(1) +# +# if 'PGPROBACKUP_SSH_PORT' in self.test_env: +# self.remote_port = self.test_env['PGPROBACKUP_SSH_PORT'] +# else +# print('PGPROBACKUP_SSH_PORT is not set') +# exit(1) +# +# if 'PGPROBACKUP_SSH_USER' in self.test_env: +# self.remote_user = self.test_env['PGPROBACKUP_SSH_USER'] +# else +# print('PGPROBACKUP_SSH_USER is not set') +# exit(1) def make_simple_node( self, base_dir=None, set_replication=False, + ptrack_enable=False, initdb_params=[], pg_options={}): @@ -244,41 +338,51 @@ def make_simple_node( node.init( initdb_params=initdb_params, allow_streaming=set_replication) - # Sane default parameters - node.append_conf("postgresql.auto.conf", "max_connections = 100") - node.append_conf("postgresql.auto.conf", "shared_buffers = 10MB") - node.append_conf("postgresql.auto.conf", "fsync = on") - node.append_conf("postgresql.auto.conf", "wal_level = logical") - node.append_conf("postgresql.auto.conf", "hot_standby = 'off'") - - node.append_conf( - "postgresql.auto.conf", "log_line_prefix = '%t [%p]: [%l-1] '") - node.append_conf("postgresql.auto.conf", "log_statement = none") - node.append_conf("postgresql.auto.conf", "log_duration = on") - node.append_conf( - "postgresql.auto.conf", "log_min_duration_statement = 0") - node.append_conf("postgresql.auto.conf", "log_connections = on") - node.append_conf("postgresql.auto.conf", "log_disconnections = on") + # set major version + with open(os.path.join(node.data_dir, 'PG_VERSION')) as f: + node.major_version_str = str(f.read().rstrip()) + node.major_version = float(node.major_version_str) - # Apply given parameters - for key, value in six.iteritems(pg_options): - node.append_conf("postgresql.auto.conf", "%s = %s" % (key, value)) + # Sane default parameters + options = {} + options['max_connections'] = 100 + options['shared_buffers'] = '10MB' + options['fsync'] = 'off' + + options['wal_level'] = 'logical' + options['hot_standby'] = 'off' + + options['log_line_prefix'] = '%t [%p]: [%l-1] ' + options['log_statement'] = 'none' + options['log_duration'] = 'on' + options['log_min_duration_statement'] = 0 + options['log_connections'] = 'on' + options['log_disconnections'] = 'on' + options['restart_after_crash'] = 'off' # Allow replication in pg_hba.conf if set_replication: - node.append_conf( - "pg_hba.conf", - "local replication all trust\n") - node.append_conf( - "postgresql.auto.conf", - "max_wal_senders = 10") + options['max_wal_senders'] = 10 + + if ptrack_enable: + if node.major_version > 11: + options['ptrack.map_size'] = '128' + options['shared_preload_libraries'] = 'ptrack' + else: + options['ptrack_enable'] = 'on' + + # set default values + self.set_auto_conf(node, options) + + # Apply given parameters + self.set_auto_conf(node, pg_options) return node def create_tblspace_in_node(self, node, tblspc_name, tblspc_path=None, cfs=False): res = node.execute( - "postgres", - "select exists" + 'postgres', + 'select exists' " (select 1 from pg_tablespace where spcname = '{0}')".format( tblspc_name) ) @@ -294,11 +398,11 @@ def create_tblspace_in_node(self, node, tblspc_name, tblspc_path=None, cfs=False cmd = "CREATE TABLESPACE {0} LOCATION '{1}'".format( tblspc_name, tblspc_path) if cfs: - cmd += " with (compression=true)" + cmd += ' with (compression=true)' if not os.path.exists(tblspc_path): os.makedirs(tblspc_path) - res = node.safe_psql("postgres", cmd) + res = node.safe_psql('postgres', cmd) # Check that tablespace was successfully created # self.assertEqual( # res[0], 0, @@ -309,13 +413,13 @@ def get_tblspace_path(self, node, tblspc_name): def get_fork_size(self, node, fork_name): return node.execute( - "postgres", + 'postgres', "select pg_relation_size('{0}')/8192".format(fork_name))[0][0] def get_fork_path(self, node, fork_name): return os.path.join( node.base_dir, 'data', node.execute( - "postgres", + 'postgres', "select pg_relation_filepath('{0}')".format( fork_name))[0][0] ) @@ -343,7 +447,7 @@ def get_md5_per_page_for_fork(self, file, size_in_pages): end_page = pages_per_segment[segment_number] else: file_desc = os.open( - file+".{0}".format(segment_number), os.O_RDONLY + file+'.{0}'.format(segment_number), os.O_RDONLY ) start_page = max(md5_per_page)+1 end_page = end_page + pages_per_segment[segment_number] @@ -360,12 +464,20 @@ def get_md5_per_page_for_fork(self, file, size_in_pages): def get_ptrack_bits_per_page_for_fork(self, node, file, size=[]): if self.get_pgpro_edition(node) == 'enterprise': - header_size = 48 + if self.get_version(node) < self.version_to_num('10.0'): + header_size = 48 + else: + header_size = 24 else: header_size = 24 ptrack_bits_for_fork = [] + # TODO: use macro instead of hard coded 8KB page_body_size = 8192-header_size + # Check that if main fork file size is 0, it`s ok + # to not having a _ptrack fork + if os.path.getsize(file) == 0: + return ptrack_bits_for_fork byte_size = os.path.getsize(file + '_ptrack') npages = byte_size/8192 if byte_size % 8192 != 0: @@ -392,6 +504,31 @@ def get_ptrack_bits_per_page_for_fork(self, node, file, size=[]): os.close(file) return ptrack_bits_for_fork + def check_ptrack_map_sanity(self, node, idx_ptrack): + if node.major_version >= 12: + return + + success = True + for i in idx_ptrack: + # get new size of heap and indexes. size calculated in pages + idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) + # update path to heap and index files in case they`ve changed + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate new md5sums for pages + idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) + # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], + [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) + + # compare pages and check ptrack sanity + if not self.check_ptrack_sanity(idx_ptrack[i]): + success = False + + self.assertTrue( + success, 'Ptrack has failed to register changes in data files') + def check_ptrack_sanity(self, idx_dict): success = True if idx_dict['new_size'] > idx_dict['old_size']: @@ -405,8 +542,9 @@ def check_ptrack_sanity(self, idx_dict): if idx_dict['ptrack'][PageNum] != 1: if self.verbose: print( - 'Page Number {0} of type {1} was added,' - ' but ptrack value is {2}. THIS IS BAD'.format( + 'File: {0}\n Page Number {1} of type {2} was added,' + ' but ptrack value is {3}. THIS IS BAD'.format( + idx_dict['path'], PageNum, idx_dict['type'], idx_dict['ptrack'][PageNum]) ) @@ -415,13 +553,14 @@ def check_ptrack_sanity(self, idx_dict): continue if PageNum not in idx_dict['new_pages']: # Page is not present now, meaning that relation got smaller - # Ptrack should be equal to 0, + # Ptrack should be equal to 1, # We are not freaking out about false positive stuff - if idx_dict['ptrack'][PageNum] != 0: + if idx_dict['ptrack'][PageNum] != 1: if self.verbose: print( - 'Page Number {0} of type {1} was deleted,' - ' but ptrack value is {2}'.format( + 'File: {0}\n Page Number {1} of type {2} was deleted,' + ' but ptrack value is {3}. THIS IS BAD'.format( + idx_dict['path'], PageNum, idx_dict['type'], idx_dict['ptrack'][PageNum]) ) @@ -437,14 +576,15 @@ def check_ptrack_sanity(self, idx_dict): if idx_dict['ptrack'][PageNum] != 1: if self.verbose: print( - 'Page Number {0} of type {1} was changed,' - ' but ptrack value is {2}. THIS IS BAD'.format( + 'File: {0}\n Page Number {1} of type {2} was changed,' + ' but ptrack value is {3}. THIS IS BAD'.format( + idx_dict['path'], PageNum, idx_dict['type'], idx_dict['ptrack'][PageNum]) ) print( - "\n Old checksumm: {0}\n" - " New checksumm: {1}".format( + ' Old checksumm: {0}\n' + ' New checksumm: {1}'.format( idx_dict['old_pages'][PageNum], idx_dict['new_pages'][PageNum]) ) @@ -463,19 +603,52 @@ def check_ptrack_sanity(self, idx_dict): if idx_dict['ptrack'][PageNum] != 0: if self.verbose: print( - 'Page Number {0} of type {1} was not changed,' - ' but ptrack value is {2}'.format( + 'File: {0}\n Page Number {1} of type {2} was not changed,' + ' but ptrack value is {3}'.format( + idx_dict['path'], PageNum, idx_dict['type'], idx_dict['ptrack'][PageNum] ) ) + return success + # self.assertTrue( + # success, 'Ptrack has failed to register changes in data files' + # ) - self.assertTrue( - success, 'Ptrack does not correspond to state' - ' of its own pages.\n Gory Details: \n{0}'.format( - idx_dict['type'], idx_dict - ) - ) + def get_backup_filelist(self, backup_dir, instance, backup_id): + + filelist_path = os.path.join( + backup_dir, 'backups', + instance, backup_id, 'backup_content.control') + + with open(filelist_path, 'r') as f: + filelist_raw = f.read() + + filelist_splitted = filelist_raw.splitlines() + + filelist = {} + for line in filelist_splitted: + line = json.loads(line) + filelist[line['path']] = line + + return filelist + + # return dict of files from filelist A, + # which are not exists in filelist_B + def get_backup_filelist_diff(self, filelist_A, filelist_B): + + filelist_diff = {} + for file in filelist_A: + if file not in filelist_B: + filelist_diff[file] = filelist_A[file] + + return filelist_diff + + # used for partial restore + def truncate_every_file_in_dir(self, path): + for file in os.listdir(path): + with open(os.path.join(path, file), "w") as f: + f.close() def check_ptrack_recovery(self, idx_dict): size = idx_dict['size'] @@ -507,14 +680,23 @@ def check_ptrack_clean(self, idx_dict, size): ) ) - def run_pb(self, command, async=False, gdb=False): + def run_pb(self, command, asynchronous=False, gdb=False, old_binary=False, return_id=True): + if not self.probackup_old_path and old_binary: + print('PGPROBACKUPBIN_OLD is not set') + exit(1) + + if old_binary: + binary_path = self.probackup_old_path + else: + binary_path = self.probackup_path + try: - self.cmd = [' '.join(map(str, [self.probackup_path] + command))] + self.cmd = [' '.join(map(str, [binary_path] + command))] if self.verbose: print(self.cmd) if gdb: - return GDBobj([self.probackup_path] + command, self.verbose) - if async: + return GDBobj([binary_path] + command, self.verbose) + if asynchronous: return subprocess.Popen( self.cmd, stdout=subprocess.PIPE, @@ -523,11 +705,11 @@ def run_pb(self, command, async=False, gdb=False): ) else: self.output = subprocess.check_output( - [self.probackup_path] + command, + [binary_path] + command, stderr=subprocess.STDOUT, env=self.test_env - ).decode("utf-8") - if command[0] == 'backup': + ).decode('utf-8') + if command[0] == 'backup' and return_id: # return backup ID for line in self.output.splitlines(): if 'INFO: Backup' and 'completed' in line: @@ -535,13 +717,13 @@ def run_pb(self, command, async=False, gdb=False): else: return self.output except subprocess.CalledProcessError as e: - raise ProbackupException(e.output.decode("utf-8"), self.cmd) + raise ProbackupException(e.output.decode('utf-8'), self.cmd) - def run_binary(self, command, async=False): + def run_binary(self, command, asynchronous=False): if self.verbose: print([' '.join(map(str, command))]) try: - if async: + if asynchronous: return subprocess.Popen( command, stdin=subprocess.PIPE, @@ -554,121 +736,211 @@ def run_binary(self, command, async=False): command, stderr=subprocess.STDOUT, env=self.test_env - ).decode("utf-8") + ).decode('utf-8') return self.output except subprocess.CalledProcessError as e: - raise ProbackupException(e.output.decode("utf-8"), command) + raise ProbackupException(e.output.decode('utf-8'), command) - def init_pb(self, backup_dir): + def init_pb(self, backup_dir, options=[], old_binary=False): shutil.rmtree(backup_dir, ignore_errors=True) - return self.run_pb([ - "init", - "-B", backup_dir - ]) - def add_instance(self, backup_dir, instance, node): + # don`t forget to kill old_binary after remote ssh release + if self.remote and not old_binary: + options = options + [ + '--remote-proto=ssh', + '--remote-host=localhost'] return self.run_pb([ - "add-instance", - "--instance={0}".format(instance), - "-B", backup_dir, - "-D", node.data_dir - ]) + 'init', + '-B', backup_dir + ] + options, + old_binary=old_binary + ) + + def add_instance(self, backup_dir, instance, node, old_binary=False, options=[]): + + cmd = [ + 'add-instance', + '--instance={0}'.format(instance), + '-B', backup_dir, + '-D', node.data_dir + ] + + # don`t forget to kill old_binary after remote ssh release + if self.remote and not old_binary: + options = options + [ + '--remote-proto=ssh', + '--remote-host=localhost'] + + return self.run_pb(cmd + options, old_binary=old_binary) + + def set_config(self, backup_dir, instance, old_binary=False, options=[]): + + cmd = [ + 'set-config', + '--instance={0}'.format(instance), + '-B', backup_dir, + ] - def del_instance(self, backup_dir, instance): + return self.run_pb(cmd + options, old_binary=old_binary) + + def set_backup(self, backup_dir, instance, backup_id=False, + old_binary=False, options=[]): + + cmd = [ + 'set-backup', + '-B', backup_dir + ] + + if instance: + cmd = cmd + ['--instance={0}'.format(instance)] + + if backup_id: + cmd = cmd + ['-i', backup_id] + + return self.run_pb(cmd + options, old_binary=old_binary) + + def del_instance(self, backup_dir, instance, old_binary=False): return self.run_pb([ - "del-instance", - "--instance={0}".format(instance), - "-B", backup_dir - ]) + 'del-instance', + '--instance={0}'.format(instance), + '-B', backup_dir + ], + old_binary=old_binary + ) def clean_pb(self, backup_dir): shutil.rmtree(backup_dir, ignore_errors=True) def backup_node( self, backup_dir, instance, node, data_dir=False, - backup_type="full", options=[], async=False, gdb=False + backup_type='full', datname=False, options=[], + asynchronous=False, gdb=False, + old_binary=False, return_id=True, no_remote=False ): if not node and not data_dir: print('You must provide ether node or data_dir for backup') exit(1) - if node: - pgdata = node.data_dir - - if data_dir: - pgdata = data_dir + if not datname: + datname = 'postgres' cmd_list = [ - "backup", - "-B", backup_dir, + 'backup', + '-B', backup_dir, + '--instance={0}'.format(instance), # "-D", pgdata, - "-p", "%i" % node.port, - "-d", "postgres", - "--instance={0}".format(instance) + '-p', '%i' % node.port, + '-d', datname ] + + if data_dir: + cmd_list += ['-D', data_dir] + + # don`t forget to kill old_binary after remote ssh release + if self.remote and not old_binary and not no_remote: + options = options + [ + '--remote-proto=ssh', + '--remote-host=localhost'] + if backup_type: - cmd_list += ["-b", backup_type] + cmd_list += ['-b', backup_type] + + if not old_binary: + cmd_list += ['--no-sync'] - return self.run_pb(cmd_list + options, async, gdb) + return self.run_pb(cmd_list + options, asynchronous, gdb, old_binary, return_id) - def merge_backup(self, backup_dir, instance, backup_id): + def checkdb_node( + self, backup_dir=False, instance=False, data_dir=False, + options=[], asynchronous=False, gdb=False, old_binary=False + ): + + cmd_list = ["checkdb"] + + if backup_dir: + cmd_list += ["-B", backup_dir] + + if instance: + cmd_list += ["--instance={0}".format(instance)] + + if data_dir: + cmd_list += ["-D", data_dir] + + return self.run_pb(cmd_list + options, asynchronous, gdb, old_binary) + + def merge_backup( + self, backup_dir, instance, backup_id, asynchronous=False, + gdb=False, old_binary=False, options=[]): cmd_list = [ - "merge", - "-B", backup_dir, - "--instance={0}".format(instance), - "-i", backup_id + 'merge', + '-B', backup_dir, + '--instance={0}'.format(instance), + '-i', backup_id ] - return self.run_pb(cmd_list) + return self.run_pb(cmd_list + options, asynchronous, gdb, old_binary) def restore_node( self, backup_dir, instance, node=False, - data_dir=None, backup_id=None, options=[] + data_dir=None, backup_id=None, old_binary=False, options=[], + gdb=False ): + if data_dir is None: data_dir = node.data_dir cmd_list = [ - "restore", - "-B", backup_dir, - "-D", data_dir, - "--instance={0}".format(instance) + 'restore', + '-B', backup_dir, + '-D', data_dir, + '--instance={0}'.format(instance) ] + + # don`t forget to kill old_binary after remote ssh release + if self.remote and not old_binary: + options = options + [ + '--remote-proto=ssh', + '--remote-host=localhost'] + if backup_id: - cmd_list += ["-i", backup_id] + cmd_list += ['-i', backup_id] - return self.run_pb(cmd_list + options) + if not old_binary: + cmd_list += ['--no-sync'] + + return self.run_pb(cmd_list + options, gdb=gdb, old_binary=old_binary) def show_pb( self, backup_dir, instance=None, backup_id=None, - options=[], as_text=False, as_json=True + options=[], as_text=False, as_json=True, old_binary=False ): backup_list = [] specific_record = {} cmd_list = [ - "show", - "-B", backup_dir, + 'show', + '-B', backup_dir, ] if instance: - cmd_list += ["--instance={0}".format(instance)] + cmd_list += ['--instance={0}'.format(instance)] if backup_id: - cmd_list += ["-i", backup_id] + cmd_list += ['-i', backup_id] + # AHTUNG, WARNING will break json parsing if as_json: - cmd_list += ["--format=json"] + cmd_list += ['--format=json', '--log-level-console=error'] if as_text: # You should print it when calling as_text=true - return self.run_pb(cmd_list + options) + return self.run_pb(cmd_list + options, old_binary=old_binary) # get show result as list of lines if as_json: - data = json.loads(self.run_pb(cmd_list + options)) + data = json.loads(self.run_pb(cmd_list + options, old_binary=old_binary)) # print(data) for instance_data in data: # find specific instance if requested @@ -682,9 +954,14 @@ def show_pb( return backup else: backup_list.append(backup) + + if backup_id is not None: + self.assertTrue(False, "Failed to find backup with ID: {0}".format(backup_id)) + return backup_list else: - show_splitted = self.run_pb(cmd_list + options).splitlines() + show_splitted = self.run_pb( + cmd_list + options, old_binary=old_binary).splitlines() if instance is not None and backup_id is None: # cut header(ID, Mode, etc) from show as single string header = show_splitted[1:2][0] @@ -694,7 +971,7 @@ def show_pb( # inverse list so oldest record come first body = body[::-1] # split string in list with string for every header element - header_split = re.split(" +", header) + header_split = re.split(' +', header) # Remove empty items for i in header_split: if i == '': @@ -706,7 +983,7 @@ def show_pb( for backup_record in body: backup_record = backup_record.rstrip() # split list with str for every backup record element - backup_record_split = re.split(" +", backup_record) + backup_record_split = re.split(' +', backup_record) # Remove empty items for i in backup_record_split: if i == '': @@ -731,142 +1008,345 @@ def show_pb( ] # print sanitized_show for line in sanitized_show: - name, var = line.partition(" = ")[::2] + name, var = line.partition(' = ')[::2] var = var.strip('"') var = var.strip("'") specific_record[name.strip()] = var + + if not specific_record: + self.assertTrue(False, "Failed to find backup with ID: {0}".format(backup_id)) + return specific_record + def show_archive( + self, backup_dir, instance=None, options=[], + as_text=False, as_json=True, old_binary=False, + tli=0 + ): + + cmd_list = [ + 'show', + '--archive', + '-B', backup_dir, + ] + if instance: + cmd_list += ['--instance={0}'.format(instance)] + + # AHTUNG, WARNING will break json parsing + if as_json: + cmd_list += ['--format=json', '--log-level-console=error'] + + if as_text: + # You should print it when calling as_text=true + return self.run_pb(cmd_list + options, old_binary=old_binary) + + if as_json: + if as_text: + data = self.run_pb(cmd_list + options, old_binary=old_binary) + else: + data = json.loads(self.run_pb(cmd_list + options, old_binary=old_binary)) + + if instance: + instance_timelines = None + for instance_name in data: + if instance_name['instance'] == instance: + instance_timelines = instance_name['timelines'] + break + + if tli > 0: + timeline_data = None + for timeline in instance_timelines: + if timeline['tli'] == tli: + return timeline + + return {} + + if instance_timelines: + return instance_timelines + + return data + else: + show_splitted = self.run_pb( + cmd_list + options, old_binary=old_binary).splitlines() + print(show_splitted) + exit(1) + def validate_pb( self, backup_dir, instance=None, - backup_id=None, options=[] + backup_id=None, options=[], old_binary=False, gdb=False ): cmd_list = [ - "validate", - "-B", backup_dir + 'validate', + '-B', backup_dir ] if instance: - cmd_list += ["--instance={0}".format(instance)] + cmd_list += ['--instance={0}'.format(instance)] if backup_id: - cmd_list += ["-i", backup_id] + cmd_list += ['-i', backup_id] - return self.run_pb(cmd_list + options) + return self.run_pb(cmd_list + options, old_binary=old_binary, gdb=gdb) - def delete_pb(self, backup_dir, instance, backup_id=None, options=[]): + def delete_pb( + self, backup_dir, instance, + backup_id=None, options=[], old_binary=False): cmd_list = [ - "delete", - "-B", backup_dir + 'delete', + '-B', backup_dir ] - cmd_list += ["--instance={0}".format(instance)] + cmd_list += ['--instance={0}'.format(instance)] if backup_id: - cmd_list += ["-i", backup_id] + cmd_list += ['-i', backup_id] - return self.run_pb(cmd_list + options) + return self.run_pb(cmd_list + options, old_binary=old_binary) - def delete_expired(self, backup_dir, instance, options=[]): + def delete_expired( + self, backup_dir, instance, options=[], old_binary=False): cmd_list = [ - "delete", "--expired", "--wal", - "-B", backup_dir, - "--instance={0}".format(instance) + 'delete', + '-B', backup_dir, + '--instance={0}'.format(instance) ] - return self.run_pb(cmd_list + options) + return self.run_pb(cmd_list + options, old_binary=old_binary) - def show_config(self, backup_dir, instance): + def show_config(self, backup_dir, instance, old_binary=False): out_dict = {} cmd_list = [ - "show-config", - "-B", backup_dir, - "--instance={0}".format(instance) + 'show-config', + '-B', backup_dir, + '--instance={0}'.format(instance) ] - res = self.run_pb(cmd_list).splitlines() + + res = self.run_pb(cmd_list, old_binary=old_binary).splitlines() for line in res: if not line.startswith('#'): - name, var = line.partition(" = ")[::2] + name, var = line.partition(' = ')[::2] out_dict[name] = var return out_dict def get_recovery_conf(self, node): out_dict = {} + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf_path = os.path.join( + node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf_path = os.path.join(node.data_dir, 'recovery.conf') + with open( - os.path.join(node.data_dir, "recovery.conf"), "r" + recovery_conf_path, 'r' ) as recovery_conf: for line in recovery_conf: try: - key, value = line.split("=") + key, value = line.split('=') except: continue out_dict[key.strip()] = value.strip(" '").replace("'\n", "") return out_dict def set_archiving( - self, backup_dir, instance, node, replica=False, overwrite=False): + self, backup_dir, instance, node, replica=False, + overwrite=False, compress=True, old_binary=False, + log_level=False, archive_timeout=False): + # parse postgresql.auto.conf + options = {} if replica: - archive_mode = 'always' - node.append_conf('postgresql.auto.conf', 'hot_standby = on') + options['archive_mode'] = 'always' + options['hot_standby'] = 'on' else: - archive_mode = 'on' - - # node.append_conf( - # "postgresql.auto.conf", - # "wal_level = archive" - # ) - node.append_conf( - "postgresql.auto.conf", - "archive_mode = {0}".format(archive_mode) - ) - archive_command = "{0} archive-push -B {1} --instance={2} ".format( - self.probackup_path, backup_dir, instance) + options['archive_mode'] = 'on' + + if os.name == 'posix': + options['archive_command'] = '"{0}" archive-push -B {1} --instance={2} '.format( + self.probackup_path, backup_dir, instance) + + elif os.name == 'nt': + options['archive_command'] = '"{0}" archive-push -B {1} --instance={2} '.format( + self.probackup_path.replace("\\","\\\\"), + backup_dir.replace("\\","\\\\"), instance) + + # don`t forget to kill old_binary after remote ssh release + if self.remote and not old_binary: + options['archive_command'] += '--remote-proto=ssh ' + options['archive_command'] += '--remote-host=localhost ' + + if self.archive_compress and compress: + options['archive_command'] += '--compress ' + + if overwrite: + options['archive_command'] += '--overwrite ' + + options['archive_command'] += '--log-level-console=VERBOSE ' + options['archive_command'] += '-j 5 ' + options['archive_command'] += '--batch-size 10 ' + options['archive_command'] += '--no-sync ' + + if archive_timeout: + options['archive_command'] += '--archive-timeout={0} '.format( + archive_timeout) if os.name == 'posix': - if self.archive_compress: - archive_command = archive_command + "--compress " + options['archive_command'] += '--wal-file-path=%p --wal-file-name=%f' + + elif os.name == 'nt': + options['archive_command'] += '--wal-file-path="%p" --wal-file-name="%f"' + + if log_level: + options['archive_command'] += ' --log-level-console={0}'.format(log_level) + options['archive_command'] += ' --log-level-file={0} '.format(log_level) + + + self.set_auto_conf(node, options) + + def get_restore_command(self, backup_dir, instance, node): + + # parse postgresql.auto.conf + restore_command = '' + if os.name == 'posix': + restore_command += '{0} archive-get -B {1} --instance={2} '.format( + self.probackup_path, backup_dir, instance) + + elif os.name == 'nt': + restore_command += '"{0}" archive-get -B {1} --instance={2} '.format( + self.probackup_path.replace("\\","\\\\"), + backup_dir.replace("\\","\\\\"), instance) + + # don`t forget to kill old_binary after remote ssh release + if self.remote: + restore_command += '--remote-proto=ssh ' + restore_command += '--remote-host=localhost ' + + if os.name == 'posix': + restore_command += '--wal-file-path=%p --wal-file-name=%f' + + elif os.name == 'nt': + restore_command += '--wal-file-path="%p" --wal-file-name="%f"' + + return restore_command - if overwrite: - archive_command = archive_command + "--overwrite " + def set_auto_conf(self, node, options, config='postgresql.auto.conf'): - archive_command = archive_command + "--wal-file-path %p --wal-file-name %f" + # parse postgresql.auto.conf + path = os.path.join(node.data_dir, config) - node.append_conf( - "postgresql.auto.conf", - "archive_command = '{0}'".format( - archive_command)) - # elif os.name == 'nt': - # node.append_conf( - # "postgresql.auto.conf", - # "archive_command = 'copy %p {0}\\%f'".format(archive_dir) - # ) + with open(path, 'r') as f: + raw_content = f.read() + + current_options = {} + current_directives = [] + for line in raw_content.splitlines(): + + # ignore comments + if line.startswith('#'): + continue + + if line == '': + continue + + if line.startswith('include'): + current_directives.append(line) + continue + + name, var = line.partition('=')[::2] + name = name.strip() + var = var.strip() + var = var.strip('"') + var = var.strip("'") + current_options[name] = var + + for option in options: + current_options[option] = options[option] + + auto_conf = '' + for option in current_options: + auto_conf += "{0} = '{1}'\n".format( + option, current_options[option]) + + for directive in current_directives: + auto_conf += directive + "\n" + + with open(path, 'wt') as f: + f.write(auto_conf) + f.flush() + f.close() def set_replica( self, master, replica, replica_name='replica', - synchronous=False + synchronous=False, + log_shipping=False ): - replica.append_conf( - "postgresql.auto.conf", "port = {0}".format(replica.port)) - replica.append_conf('postgresql.auto.conf', 'hot_standby = on') - replica.append_conf('recovery.conf', "standby_mode = 'on'") - replica.append_conf( - "recovery.conf", - "primary_conninfo = 'user={0} port={1} application_name={2}" - " sslmode=prefer sslcompression=1'".format( - self.user, master.port, replica_name) - ) + + self.set_auto_conf( + replica, + options={ + 'port': replica.port, + 'hot_standby': 'on'}) + + if self.get_version(replica) >= self.version_to_num('12.0'): + with open(os.path.join(replica.data_dir, "standby.signal"), 'w') as f: + f.flush() + f.close() + + config = 'postgresql.auto.conf' + probackup_recovery_path = os.path.join(replica.data_dir, 'probackup_recovery.conf') + if os.path.exists(probackup_recovery_path): + if os.stat(probackup_recovery_path).st_size > 0: + config = 'probackup_recovery.conf' + + if not log_shipping: + self.set_auto_conf( + replica, + {'primary_conninfo': 'user={0} port={1} application_name={2} ' + ' sslmode=prefer sslcompression=1'.format( + self.user, master.port, replica_name)}, + config) + else: + replica.append_conf('recovery.conf', 'standby_mode = on') + + if not log_shipping: + replica.append_conf( + 'recovery.conf', + "primary_conninfo = 'user={0} port={1} application_name={2}" + " sslmode=prefer sslcompression=1'".format( + self.user, master.port, replica_name)) + if synchronous: - master.append_conf( - "postgresql.auto.conf", - "synchronous_standby_names='{0}'".format(replica_name) - ) - master.append_conf( - 'postgresql.auto.conf', - "synchronous_commit='remote_apply'" - ) + self.set_auto_conf( + master, + options={ + 'synchronous_standby_names': replica_name, + 'synchronous_commit': 'remote_apply'}) + master.reload() + def change_backup_status(self, backup_dir, instance, backup_id, status): + + control_file_path = os.path.join( + backup_dir, 'backups', instance, backup_id, 'backup.control') + + with open(control_file_path, 'r') as f: + actual_control = f.read() + + new_control_file = '' + for line in actual_control.splitlines(): + if line.startswith('status'): + line = 'status = {0}'.format(status) + new_control_file += line + new_control_file += '\n' + + with open(control_file_path, 'wt') as f: + f.write(new_control_file) + f.flush() + f.close() + + with open(control_file_path, 'r') as f: + actual_control = f.read() + def wrong_wal_clean(self, node, wal_size): - wals_dir = os.path.join(self.backup_dir(node), "wal") + wals_dir = os.path.join(self.backup_dir(node), 'wal') wals = [ f for f in os.listdir(wals_dir) if os.path.isfile( os.path.join(wals_dir, f)) @@ -878,39 +1358,39 @@ def wrong_wal_clean(self, node, wal_size): def guc_wal_segment_size(self, node): var = node.execute( - "postgres", + 'postgres', "select setting from pg_settings where name = 'wal_segment_size'" ) return int(var[0][0]) * self.guc_wal_block_size(node) def guc_wal_block_size(self, node): var = node.execute( - "postgres", + 'postgres', "select setting from pg_settings where name = 'wal_block_size'" ) return int(var[0][0]) def get_pgpro_edition(self, node): if node.execute( - "postgres", + 'postgres', "select exists (select 1 from" " pg_proc where proname = 'pgpro_edition')" )[0][0]: - var = node.execute("postgres", "select pgpro_edition()") + var = node.execute('postgres', 'select pgpro_edition()') return str(var[0][0]) else: return False def get_username(self): """ Returns current user name """ - return pwd.getpwuid(os.getuid())[0] + return getpass.getuser() def version_to_num(self, version): if not version: return 0 - parts = version.split(".") + parts = version.split('.') while len(parts) < 3: - parts.append("0") + parts.append('0') num = 0 for part in parts: num = num * 100 + int(re.sub("[^\d]", "", part)) @@ -925,34 +1405,59 @@ def switch_wal_segment(self, node): """ if isinstance(node, testgres.PostgresNode): if self.version_to_num( - node.safe_psql("postgres", "show server_version") + node.safe_psql('postgres', 'show server_version') ) >= self.version_to_num('10.0'): - node.safe_psql("postgres", "select pg_switch_wal()") + node.safe_psql('postgres', 'select pg_switch_wal()') else: - node.safe_psql("postgres", "select pg_switch_xlog()") + node.safe_psql('postgres', 'select pg_switch_xlog()') else: if self.version_to_num( - node.execute("show server_version")[0][0] + node.execute('show server_version')[0][0] ) >= self.version_to_num('10.0'): - node.execute("select pg_switch_wal()") + node.execute('select pg_switch_wal()') else: - node.execute("select pg_switch_xlog()") + node.execute('select pg_switch_xlog()') + sleep(1) + def wait_until_replica_catch_with_master(self, master, replica): + + if self.version_to_num( + master.safe_psql( + 'postgres', + 'show server_version')) >= self.version_to_num('10.0'): + master_function = 'pg_catalog.pg_current_wal_lsn()' + replica_function = 'pg_catalog.pg_last_wal_replay_lsn()' + else: + master_function = 'pg_catalog.pg_current_xlog_location()' + replica_function = 'pg_catalog.pg_last_xlog_replay_location()' + + lsn = master.safe_psql( + 'postgres', + 'SELECT {0}'.format(master_function)).rstrip() + + # Wait until replica catch up with master + replica.poll_query_until( + 'postgres', + "SELECT '{0}'::pg_lsn <= {1}".format(lsn, replica_function)) + def get_version(self, node): return self.version_to_num( - testgres.get_pg_config()["VERSION"].split(" ")[1]) + testgres.get_pg_config()['VERSION'].split(" ")[1]) def get_bin_path(self, binary): return testgres.get_bin_path(binary) - def del_test_dir(self, module_name, fname): + def del_test_dir(self, module_name, fname, nodes=[]): """ Del testdir and optimistically try to del module dir""" try: testgres.clean_all() except: pass + for node in nodes: + node.stop() + shutil.rmtree( os.path.join( self.tmp_path, @@ -966,7 +1471,7 @@ def del_test_dir(self, module_name, fname): except: pass - def pgdata_content(self, directory, ignore_ptrack=True): + def pgdata_content(self, pgdata, ignore_ptrack=True, exclude_dirs=None): """ return dict with directory content. " " TAKE IT AFTER CHECKPOINT or BACKUP""" dirs_to_ignore = [ @@ -977,15 +1482,21 @@ def pgdata_content(self, directory, ignore_ptrack=True): 'postmaster.pid', 'postmaster.opts', 'pg_internal.init', 'postgresql.auto.conf', 'backup_label', 'tablespace_map', 'recovery.conf', - 'ptrack_control', 'ptrack_init', 'pg_control' + 'ptrack_control', 'ptrack_init', 'pg_control', + 'probackup_recovery.conf', 'recovery.signal', + 'standby.signal', 'ptrack.map', 'ptrack.map.mmap' ] + + if exclude_dirs: + dirs_to_ignore = dirs_to_ignore + exclude_dirs # suffixes_to_ignore = ( # '_ptrack' # ) directory_dict = {} - directory_dict['pgdata'] = directory + directory_dict['pgdata'] = pgdata directory_dict['files'] = {} - for root, dirs, files in os.walk(directory, followlinks=True): + directory_dict['dirs'] = {} + for root, dirs, files in os.walk(pgdata, followlinks=True): dirs[:] = [d for d in dirs if d not in dirs_to_ignore] for file in files: if ( @@ -995,11 +1506,12 @@ def pgdata_content(self, directory, ignore_ptrack=True): continue file_fullpath = os.path.join(root, file) - file_relpath = os.path.relpath(file_fullpath, directory) + file_relpath = os.path.relpath(file_fullpath, pgdata) directory_dict['files'][file_relpath] = {'is_datafile': False} directory_dict['files'][file_relpath]['md5'] = hashlib.md5( open(file_fullpath, 'rb').read()).hexdigest() + # crappy algorithm if file.isdigit(): directory_dict['files'][file_relpath]['is_datafile'] = True size_in_pages = os.path.getsize(file_fullpath)/8192 @@ -1008,12 +1520,74 @@ def pgdata_content(self, directory, ignore_ptrack=True): file_fullpath, size_in_pages ) + for root, dirs, files in os.walk(pgdata, topdown=False, followlinks=True): + for directory in dirs: + directory_path = os.path.join(root, directory) + directory_relpath = os.path.relpath(directory_path, pgdata) + + found = False + for d in dirs_to_ignore: + if d in directory_relpath: + found = True + break + + # check if directory already here as part of larger directory + if not found: + for d in directory_dict['dirs']: + # print("OLD dir {0}".format(d)) + if directory_relpath in d: + found = True + break + + if not found: + directory_dict['dirs'][directory_relpath] = {} + + # get permissions for every file and directory + for file in directory_dict['dirs']: + full_path = os.path.join(pgdata, file) + directory_dict['dirs'][file]['mode'] = os.stat( + full_path).st_mode + + for file in directory_dict['files']: + full_path = os.path.join(pgdata, file) + directory_dict['files'][file]['mode'] = os.stat( + full_path).st_mode + return directory_dict def compare_pgdata(self, original_pgdata, restored_pgdata): """ return dict with directory content. DO IT BEFORE RECOVERY""" fail = False error_message = 'Restored PGDATA is not equal to original!\n' + + # Compare directories + for directory in restored_pgdata['dirs']: + if directory not in original_pgdata['dirs']: + fail = True + error_message += '\nDirectory was not present' + error_message += ' in original PGDATA: {0}\n'.format( + os.path.join(restored_pgdata['pgdata'], directory)) + else: + if ( + restored_pgdata['dirs'][directory]['mode'] != + original_pgdata['dirs'][directory]['mode'] + ): + fail = True + error_message += '\nDir permissions mismatch:\n' + error_message += ' Dir old: {0} Permissions: {1}\n'.format( + os.path.join(original_pgdata['pgdata'], directory), + original_pgdata['dirs'][directory]['mode']) + error_message += ' Dir new: {0} Permissions: {1}\n'.format( + os.path.join(restored_pgdata['pgdata'], directory), + restored_pgdata['dirs'][directory]['mode']) + + for directory in original_pgdata['dirs']: + if directory not in restored_pgdata['dirs']: + fail = True + error_message += '\nDirectory dissappeared' + error_message += ' in restored PGDATA: {0}\n'.format( + os.path.join(restored_pgdata['pgdata'], directory)) + for file in restored_pgdata['files']: # File is present in RESTORED PGDATA # but not present in ORIGINAL @@ -1027,6 +1601,19 @@ def compare_pgdata(self, original_pgdata, restored_pgdata): for file in original_pgdata['files']: if file in restored_pgdata['files']: + if ( + restored_pgdata['files'][file]['mode'] != + original_pgdata['files'][file]['mode'] + ): + fail = True + error_message += '\nFile permissions mismatch:\n' + error_message += ' File_old: {0} Permissions: {1}\n'.format( + os.path.join(original_pgdata['pgdata'], file), + original_pgdata['files'][file]['mode']) + error_message += ' File_new: {0} Permissions: {1}\n'.format( + os.path.join(restored_pgdata['pgdata'], file), + restored_pgdata['files'][file]['mode']) + if ( original_pgdata['files'][file]['md5'] != restored_pgdata['files'][file]['md5'] @@ -1082,38 +1669,13 @@ def compare_pgdata(self, original_pgdata, restored_pgdata): else: error_message += ( - '\nFile dissappearance.\n ' + '\nFile disappearance.\n ' 'File: {0}\n').format( os.path.join(restored_pgdata['pgdata'], file) ) fail = True self.assertFalse(fail, error_message) - def get_async_connect(self, database=None, host=None, port=5432): - if not database: - database = 'postgres' - if not host: - host = '127.0.0.1' - - return psycopg2.connect( - database="postgres", - host='127.0.0.1', - port=port, - async=True - ) - - def wait(self, connection): - while True: - state = connection.poll() - if state == psycopg2.extensions.POLL_OK: - break - elif state == psycopg2.extensions.POLL_WRITE: - select.select([], [connection.fileno()], []) - elif state == psycopg2.extensions.POLL_READ: - select.select([connection.fileno()], [], []) - else: - raise psycopg2.OperationalError("poll() returned %s" % state) - def gdb_attach(self, pid): return GDBobj([str(pid)], self.verbose, attach=True) @@ -1133,7 +1695,7 @@ def __init__(self, cmd, verbose, attach=False): # Check gdb presense try: gdb_version, _ = subprocess.Popen( - ["gdb", "--version"], + ['gdb', '--version'], stdout=subprocess.PIPE ).communicate() except OSError: @@ -1184,12 +1746,16 @@ def __init__(self, cmd, verbose, attach=False): break def set_breakpoint(self, location): + result = self._execute('break ' + location) for line in result: if line.startswith('~"Breakpoint'): return - elif line.startswith('^error') or line.startswith('(gdb)'): + elif line.startswith('=breakpoint-created'): + return + + elif line.startswith('^error'): #or line.startswith('(gdb)'): break elif line.startswith('&"break'): @@ -1208,6 +1774,18 @@ def set_breakpoint(self, location): 'Failed to set breakpoint.\n Output:\n {0}'.format(result) ) + def remove_all_breakpoints(self): + + result = self._execute('delete') + for line in result: + + if line.startswith('^done'): + return + + raise GdbException( + 'Failed to remove breakpoints.\n Output:\n {0}'.format(result) + ) + def run_until_break(self): result = self._execute('run', False) for line in result: @@ -1220,18 +1798,17 @@ def run_until_break(self): def continue_execution_until_running(self): result = self._execute('continue') - running = False for line in result: - if line.startswith('*running'): - running = True - break + if line.startswith('*running') or line.startswith('^running'): + return if line.startswith('*stopped,reason="breakpoint-hit"'): - running = False continue if line.startswith('*stopped,reason="exited-normally"'): - running = False continue - return running + + raise GdbException( + 'Failed to continue execution until running.\n' + ) def continue_execution_until_exit(self): result = self._execute('continue', False) @@ -1242,14 +1819,30 @@ def continue_execution_until_exit(self): if line.startswith('*stopped,reason="breakpoint-hit"'): continue if ( - line.startswith('*stopped,reason="exited-normally"') or + line.startswith('*stopped,reason="exited') or line == '*stopped\n' ): return + raise GdbException( 'Failed to continue execution until exit.\n' ) + def continue_execution_until_error(self): + result = self._execute('continue', False) + + for line in result: + if line.startswith('^error'): + return + if line.startswith('*stopped,reason="exited'): + return + if line.startswith( + '*stopped,reason="signal-received",signal-name="SIGABRT"'): + return + + raise GdbException( + 'Failed to continue execution until error.\n') + def continue_execution_until_break(self, ignore_count=0): if ignore_count > 0: result = self._execute( @@ -1259,16 +1852,14 @@ def continue_execution_until_break(self, ignore_count=0): else: result = self._execute('continue', False) - running = False for line in result: - if line.startswith('*running'): - running = True if line.startswith('*stopped,reason="breakpoint-hit"'): - return 'breakpoint-hit' + return if line.startswith('*stopped,reason="exited-normally"'): - return 'exited-normally' - if running: - return 'running' + break + + raise GdbException( + 'Failed to continue execution until break.\n') def stopped_in_breakpoint(self): output = [] @@ -1287,14 +1878,29 @@ def _execute(self, cmd, running=True): self.proc.stdin.flush() self.proc.stdin.write(cmd + '\n') self.proc.stdin.flush() + sleep(1) + + # look for command we just send + while True: + line = self.proc.stdout.readline() + if self.verbose: + print(repr(line)) + + if cmd not in line: + continue + else: + break while True: line = self.proc.stdout.readline() output += [line] if self.verbose: print(repr(line)) - if line == '^done\n' or line.startswith('*stopped'): + if line.startswith('^done') or line.startswith('*stopped'): + break + if line.startswith('^error'): break - if running and line.startswith('*running'): + if running and (line.startswith('*running') or line.startswith('^running')): +# if running and line.startswith('*running'): break return output diff --git a/tests/incr_restore.py b/tests/incr_restore.py new file mode 100644 index 000000000..9caa479c0 --- /dev/null +++ b/tests/incr_restore.py @@ -0,0 +1,1990 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +import subprocess +from datetime import datetime +import sys +from time import sleep +from datetime import datetime, timedelta +import hashlib +import shutil +import json +from testgres import QueryException + + +module_name = 'incr_restore' + + +class IncrRestoreTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + def test_basic_incr_restore(self): + """incremental restore in CHECKSUM mode""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=50) + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, + options=["-j", "4", "--incremental-mode=checksum"])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_checksum_corruption_detection(self): + """recovery to target timeline""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=10) + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4", "--incremental-mode=lsn"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incr_restore_with_tablespace(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + tblspace = self.get_tblspace_path(node, 'tblspace') + some_directory = self.get_tblspace_path(node, 'some_directory') + + # stuff new destination with garbage + self.restore_node(backup_dir, 'node', node, data_dir=some_directory) + + self.create_tblspace_in_node(node, 'tblspace') + node.pgbench_init(scale=10, tablespace='tblspace') + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--incremental-mode=checksum", + "-T{0}={1}".format(tblspace, some_directory)]) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incr_restore_with_tablespace_1(self): + """recovery to target timeline""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + tblspace = self.get_tblspace_path(node, 'tblspace') + some_directory = self.get_tblspace_path(node, 'some_directory') + + self.restore_node(backup_dir, 'node', node, data_dir=some_directory) + + self.create_tblspace_in_node(node, 'tblspace') + node.pgbench_init(scale=10, tablespace='tblspace') + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--stream']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4", "--incremental-mode=checksum"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incr_restore_with_tablespace_2(self): + """ + If "--tablespace-mapping" option is used with incremental restore, + then new directory must be empty. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + tblspace = self.get_tblspace_path(node, 'tblspace') + self.create_tblspace_in_node(node, 'tblspace') + node.pgbench_init(scale=10, tablespace='tblspace') + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_1')) + + node_1.cleanup() + + self.restore_node( + backup_dir, 'node', node, + data_dir=node_1.data_dir, + options=['--incremental-mode=checksum']) + + self.restore_node( + backup_dir, 'node', node, + data_dir=node_1.data_dir, + options=['--incremental-mode=checksum', '-T{0}={1}'.format(tblspace, tblspace)]) + + pgdata_restored = self.pgdata_content(node_1.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_incr_restore_sanity(self): + """recovery to target timeline""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + try: + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4", "--incremental-mode=checksum"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because there is running postmaster " + "process in destination directory.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Postmaster with pid', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'ERROR: Incremental restore is impossible', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + node_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_1')) + + try: + self.restore_node( + backup_dir, 'node', node_1, data_dir=node_1.data_dir, + options=["-j", "4", "--incremental-mode=checksum"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because destination directory has wrong system id.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup catalog was initialized for system id', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'ERROR: Incremental restore is impossible', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_incr_checksum_restore(self): + """ + /----C-----D + ------A----B---*--------X + + X - is instance, we want to return it to C state. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=50) + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + xid = node.safe_psql( + 'postgres', + 'select txid_current()').rstrip() + + # --A-----B--------X + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + node_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_1')) + node_1.cleanup() + + self.restore_node( + backup_dir, 'node', node_1, data_dir=node_1.data_dir, + options=[ + '--recovery-target-action=promote', + '--recovery-target-xid={0}'.format(xid)]) + + self.set_auto_conf(node_1, {'port': node_1.port}) + node_1.slow_start() + + # /-- + # --A-----B----*----X + pgbench = node_1.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # /--C + # --A-----B----*----X + self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='page') + + # /--C------ + # --A-----B----*----X + pgbench = node_1.pgbench(options=['-T', '50', '-c', '1']) + pgbench.wait() + + # /--C------D + # --A-----B----*----X + self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='page') + + pgdata = self.pgdata_content(node_1.data_dir) + + print(self.restore_node( + backup_dir, 'node', node, + options=["-j", "4", "--incremental-mode=checksum"])) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.set_auto_conf(node, {'port': node.port}) + node.slow_start() + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node, node_1]) + + + # @unittest.skip("skip") + def test_incr_lsn_restore(self): + """ + /----C-----D + ------A----B---*--------X + + X - is instance, we want to return it to C state. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=50) + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + xid = node.safe_psql( + 'postgres', + 'select txid_current()').rstrip() + + # --A-----B--------X + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + node_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_1')) + node_1.cleanup() + + self.restore_node( + backup_dir, 'node', node_1, data_dir=node_1.data_dir, + options=[ + '--recovery-target-action=promote', + '--recovery-target-xid={0}'.format(xid)]) + + self.set_auto_conf(node_1, {'port': node_1.port}) + node_1.slow_start() + + # /-- + # --A-----B----*----X + pgbench = node_1.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # /--C + # --A-----B----*----X + self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='page') + + # /--C------ + # --A-----B----*----X + pgbench = node_1.pgbench(options=['-T', '50', '-c', '1']) + pgbench.wait() + + # /--C------D + # --A-----B----*----X + self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='page') + + pgdata = self.pgdata_content(node_1.data_dir) + + print(self.restore_node( + backup_dir, 'node', node, options=["-j", "4", "--incremental-mode=lsn"])) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.set_auto_conf(node, {'port': node.port}) + node.slow_start() + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node, node_1]) + + # @unittest.skip("skip") + def test_incr_lsn_sanity(self): + """ + /----A-----B + F------*--------X + + X - is instance, we want to return it to state B. + fail is expected behaviour in case of lsn restore. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=10) + + node_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_1')) + node_1.cleanup() + + self.restore_node( + backup_dir, 'node', node_1, data_dir=node_1.data_dir) + + self.set_auto_conf(node_1, {'port': node_1.port}) + node_1.slow_start() + + pgbench = node_1.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='full') + + pgbench = node_1.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + page_id = self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='page') + + node.stop() + + try: + self.restore_node( + backup_dir, 'node', node, data_dir=node.data_dir, + options=["-j", "4", "--incremental-mode=lsn"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because incremental restore in lsn mode is impossible\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Cannot perform incremental restore of " + "backup chain {0} in 'lsn' mode".format(page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node_1]) + + # @unittest.skip("skip") + def test_incr_checksum_sanity(self): + """ + /----A-----B + F------*--------X + + X - is instance, we want to return it to state B. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=20) + + node_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_1')) + node_1.cleanup() + + self.restore_node( + backup_dir, 'node', node_1, data_dir=node_1.data_dir) + + self.set_auto_conf(node_1, {'port': node_1.port}) + node_1.slow_start() + + pgbench = node_1.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='full') + + pgbench = node_1.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + page_id = self.backup_node(backup_dir, 'node', node_1, + data_dir=node_1.data_dir, backup_type='page') + pgdata = self.pgdata_content(node_1.data_dir) + + node.stop() + + self.restore_node( + backup_dir, 'node', node, data_dir=node.data_dir, + options=["-j", "4", "--incremental-mode=checksum"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node_1]) + + + # @unittest.skip("skip") + def test_incr_checksum_corruption_detection(self): + """ + check that corrupted page got detected and replaced + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), +# initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=20) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node, + data_dir=node.data_dir, backup_type='full') + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + page_id = self.backup_node(backup_dir, 'node', node, + data_dir=node.data_dir, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + + path = os.path.join(node.data_dir, heap_path) + with open(path, "rb+", 0) as f: + f.seek(22000) + f.write(b"bla") + f.flush() + f.close + + print(self.restore_node( + backup_dir, 'node', node, data_dir=node.data_dir, + options=["-j", "4", "--incremental-mode=checksum"])) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_incr_lsn_corruption_detection(self): + """ + check that corrupted page got detected and replaced + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=20) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node, + data_dir=node.data_dir, backup_type='full') + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + page_id = self.backup_node(backup_dir, 'node', node, + data_dir=node.data_dir, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + + path = os.path.join(node.data_dir, heap_path) + with open(path, "rb+", 0) as f: + f.seek(22000) + f.write(b"bla") + f.flush() + f.close + + self.restore_node( + backup_dir, 'node', node, data_dir=node.data_dir, + options=["-j", "4", "--incremental-mode=lsn"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_restore_multiple_external(self): + """check that cmdline has priority over config""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + node.pgbench_init(scale=20) + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.set_config( + backup_dir, 'node', + options=['-E{0}{1}{2}'.format( + external_dir1, self.EXTERNAL_DIRECTORY_DELIMITER, external_dir2)]) + + # cmdline option MUST override options in config + self.backup_node( + backup_dir, 'node', node, + backup_type='full', options=["-j", "4"]) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # cmdline option MUST override options in config + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, + options=["-j", "4", '--incremental-mode=checksum', '--log-level-console=VERBOSE'])) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_lsn_restore_multiple_external(self): + """check that cmdline has priority over config""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + node.pgbench_init(scale=20) + self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.set_config( + backup_dir, 'node', + options=['-E{0}{1}{2}'.format( + external_dir1, self.EXTERNAL_DIRECTORY_DELIMITER, external_dir2)]) + + # cmdline option MUST override options in config + self.backup_node( + backup_dir, 'node', node, + backup_type='full', options=["-j", "4"]) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # cmdline option MUST override options in config + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, + options=["-j", "4", '--incremental-mode=lsn'])) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_lsn_restore_backward(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on', 'hot_standby': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + node.pgbench_init(scale=2) + full_id = self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4"]) + + full_pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + page_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + + page_pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + delta_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=["-j", "4"]) + + delta_pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=[ + "-j", "4", '--incremental-mode=lsn', '--log-level-file=VERBOSE', + '--recovery-target=immediate', '--recovery-target-action=pause'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(full_pgdata, pgdata_restored) + + node.slow_start(replica=True) + node.stop() + + try: + self.restore_node( + backup_dir, 'node', node, backup_id=page_id, + options=[ + "-j", "4", '--incremental-mode=lsn', + '--recovery-target=immediate', '--recovery-target-action=pause']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because incremental restore in lsn mode is impossible\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "Cannot perform incremental restore of backup chain", + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.restore_node( + backup_dir, 'node', node, backup_id=page_id, + options=[ + "-j", "4", '--incremental-mode=checksum', + '--recovery-target=immediate', '--recovery-target-action=pause']) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(page_pgdata, pgdata_restored) + + node.slow_start(replica=True) + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=delta_id, + options=[ + "-j", "4", '--incremental-mode=lsn', + '--recovery-target=immediate', '--recovery-target-action=pause'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(delta_pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_checksum_restore_backward(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'hot_standby': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + node.pgbench_init(scale=20) + full_id = self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4"]) + + full_pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + page_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + + page_pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + delta_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=["-j", "4"]) + + delta_pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=[ + "-j", "4", '--incremental-mode=checksum', + '--recovery-target=immediate', '--recovery-target-action=pause'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(full_pgdata, pgdata_restored) + + node.slow_start(replica=True) + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=page_id, + options=[ + "-j", "4", '--incremental-mode=checksum', + '--recovery-target=immediate', '--recovery-target-action=pause'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(page_pgdata, pgdata_restored) + + node.slow_start(replica=True) + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=delta_id, + options=[ + "-j", "4", '--incremental-mode=checksum', + '--recovery-target=immediate', '--recovery-target-action=pause'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(delta_pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_make_replica_via_incr_checksum_restore(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums']) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', master) + self.set_archiving(backup_dir, 'node', master, replica=True) + master.slow_start() + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + master.pgbench_init(scale=20) + + self.backup_node(backup_dir, 'node', master) + + self.restore_node( + backup_dir, 'node', replica, options=['-R']) + + # Settings for Replica + self.set_replica(master, replica, synchronous=False) + + replica.slow_start(replica=True) + + pgbench = master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # PROMOTIONS + replica.promote() + new_master = replica + + # old master is going a bit further + old_master = master + pgbench = old_master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + old_master.stop() + + pgbench = new_master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # take backup from new master + self.backup_node( + backup_dir, 'node', new_master, + data_dir=new_master.data_dir, backup_type='page') + + # restore old master as replica + print(self.restore_node( + backup_dir, 'node', old_master, data_dir=old_master.data_dir, + options=['-R', '--incremental-mode=checksum'])) + + self.set_replica(new_master, old_master, synchronous=True) + + old_master.slow_start(replica=True) + + pgbench = new_master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # Clean after yourself + self.del_test_dir(module_name, fname, [new_master, old_master]) + + # @unittest.skip("skip") + def test_make_replica_via_incr_lsn_restore(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums']) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', master) + self.set_archiving(backup_dir, 'node', master, replica=True) + master.slow_start() + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + master.pgbench_init(scale=20) + + self.backup_node(backup_dir, 'node', master) + + self.restore_node( + backup_dir, 'node', replica, options=['-R']) + + # Settings for Replica + self.set_replica(master, replica, synchronous=False) + + replica.slow_start(replica=True) + + pgbench = master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # PROMOTIONS + replica.promote() + new_master = replica + + # old master is going a bit further + old_master = master + pgbench = old_master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + old_master.stop() + + pgbench = new_master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # take backup from new master + self.backup_node( + backup_dir, 'node', new_master, + data_dir=new_master.data_dir, backup_type='page') + + # restore old master as replica + print(self.restore_node( + backup_dir, 'node', old_master, data_dir=old_master.data_dir, + options=['-R', '--incremental-mode=lsn'])) + + self.set_replica(new_master, old_master, synchronous=True) + + old_master.slow_start(replica=True) + + pgbench = new_master.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # Clean after yourself + self.del_test_dir(module_name, fname, [new_master, old_master]) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_checksum_long_xact(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, +# initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + 'postgres', + 'create extension pageinspect') + + # FULL backup + con = node.connect("postgres") + con.execute("CREATE TABLE t1 (a int)") + con.commit() + + + con.execute("INSERT INTO t1 values (1)") + con.commit() + + # leave uncommited + con2 = node.connect("postgres") + con.execute("INSERT INTO t1 values (2)") + con2.execute("INSERT INTO t1 values (3)") + + full_id = self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + + con.commit() + + node.safe_psql( + 'postgres', + 'select * from t1') + + con2.commit() + node.safe_psql( + 'postgres', + 'select * from t1') + + node.stop() + + self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=["-j", "4", '--incremental-mode=checksum']) + + node.slow_start() + + self.assertEqual( + node.safe_psql( + 'postgres', + 'select count(*) from t1').rstrip(), + '1') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_lsn_long_xact_1(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, +# initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + 'postgres', + 'create extension pageinspect') + + # FULL backup + con = node.connect("postgres") + con.execute("CREATE TABLE t1 (a int)") + con.commit() + + + con.execute("INSERT INTO t1 values (1)") + con.commit() + + # leave uncommited + con2 = node.connect("postgres") + con.execute("INSERT INTO t1 values (2)") + con2.execute("INSERT INTO t1 values (3)") + + full_id = self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + + con.commit() + + # when does LSN gets stamped when checksum gets updated ? + node.safe_psql( + 'postgres', + 'select * from t1') + + con2.commit() + node.safe_psql( + 'postgres', + 'select * from t1') + + node.stop() + + try: + print(self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=["-j", "4", '--incremental-mode=lsn'])) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because incremental restore in lsn mode is impossible\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Incremental restore in 'lsn' mode require data_checksums to be " + "enabled in destination data directory", + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_lsn_long_xact_2(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'full_page_writes': 'off', + 'wal_log_hints': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + 'postgres', + 'create extension pageinspect') + + # FULL backup + con = node.connect("postgres") + con.execute("CREATE TABLE t1 (a int)") + con.commit() + + + con.execute("INSERT INTO t1 values (1)") + con.commit() + + # leave uncommited + con2 = node.connect("postgres") + con.execute("INSERT INTO t1 values (2)") + con2.execute("INSERT INTO t1 values (3)") + + full_id = self.backup_node( + backup_dir, 'node', node, + backup_type="full", options=["-j", "4", "--stream"]) + + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + +# print(node.safe_psql( +# 'postgres', +# "select * from page_header(get_raw_page('t1', 0))")) + + con.commit() + + # when does LSN gets stamped when checksum gets updated ? + node.safe_psql( + 'postgres', + 'select * from t1') + +# print(node.safe_psql( +# 'postgres', +# "select * from page_header(get_raw_page('t1', 0))")) + + con2.commit() + node.safe_psql( + 'postgres', + 'select * from t1') + +# print(node.safe_psql( +# 'postgres', +# "select * from page_header(get_raw_page('t1', 0))")) + + node.stop() + + self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=["-j", "4", '--incremental-mode=lsn']) + + node.slow_start() + + self.assertEqual( + node.safe_psql( + 'postgres', + 'select count(*) from t1').rstrip(), + '1') + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_restore_zero_size_file_checksum(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + fullpath = os.path.join(node.data_dir, 'simple_file') + with open(fullpath, "w", 0) as f: + f.flush() + f.close + + # FULL backup + id1 = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + pgdata1 = self.pgdata_content(node.data_dir) + + with open(fullpath, "rb+", 0) as f: + f.seek(9000) + f.write(b"bla") + f.flush() + f.close + + id2 = self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + pgdata2 = self.pgdata_content(node.data_dir) + + with open(fullpath, "w") as f: + f.close() + + id3 = self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + pgdata3 = self.pgdata_content(node.data_dir) + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=id1, + options=["-j", "4", '-I', 'checksum'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata1, pgdata_restored) + + self.restore_node( + backup_dir, 'node', node, backup_id=id2, + options=["-j", "4", '-I', 'checksum']) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata2, pgdata_restored) + + self.restore_node( + backup_dir, 'node', node, backup_id=id3, + options=["-j", "4", '-I', 'checksum']) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata3, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_incr_restore_zero_size_file_lsn(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + fullpath = os.path.join(node.data_dir, 'simple_file') + with open(fullpath, "w", 0) as f: + f.flush() + f.close + + # FULL backup + id1 = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + pgdata1 = self.pgdata_content(node.data_dir) + + with open(fullpath, "rb+", 0) as f: + f.seek(9000) + f.write(b"bla") + f.flush() + f.close + + id2 = self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + pgdata2 = self.pgdata_content(node.data_dir) + + with open(fullpath, "w") as f: + f.close() + + id3 = self.backup_node( + backup_dir, 'node', node, + backup_type="delta", options=["-j", "4", "--stream"]) + pgdata3 = self.pgdata_content(node.data_dir) + + node.stop() + + print(self.restore_node( + backup_dir, 'node', node, backup_id=id1, + options=["-j", "4", '-I', 'checksum'])) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata1, pgdata_restored) + + node.slow_start() + node.stop() + + self.restore_node( + backup_dir, 'node', node, backup_id=id2, + options=["-j", "4", '-I', 'checksum']) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata2, pgdata_restored) + + node.slow_start() + node.stop() + + self.restore_node( + backup_dir, 'node', node, backup_id=id3, + options=["-j", "4", '-I', 'checksum']) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata3, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_incremental_partial_restore_exclude_checksum(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + + db_list_splitted = db_list_raw.splitlines() + + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + node.pgbench_init(scale=20) + + # FULL backup + self.backup_node(backup_dir, 'node', node) + pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # PAGE backup + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # restore FULL backup into second node2 + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1')) + node1.cleanup() + + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + + # restore some data into node2 + self.restore_node(backup_dir, 'node', node2) + + # partial restore backup into node1 + self.restore_node( + backup_dir, 'node', + node1, options=[ + "--db-exclude=db1", + "--db-exclude=db5"]) + + pgdata1 = self.pgdata_content(node1.data_dir) + + # partial incremental restore backup into node2 + print(self.restore_node( + backup_dir, 'node', + node2, options=[ + "--db-exclude=db1", + "--db-exclude=db5", + "-I", "checksum"])) + + pgdata2 = self.pgdata_content(node2.data_dir) + + self.compare_pgdata(pgdata1, pgdata2) + + self.set_auto_conf(node2, {'port': node2.port}) + + node2.slow_start() + + node2.safe_psql( + 'postgres', + 'select 1') + + try: + node2.safe_psql( + 'db1', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + try: + node2.safe_psql( + 'db5', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + with open(node2.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('PANIC', output) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node, node2]) + + def test_incremental_partial_restore_exclude_lsn(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + + db_list_splitted = db_list_raw.splitlines() + + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + node.pgbench_init(scale=20) + + # FULL backup + self.backup_node(backup_dir, 'node', node) + pgdata = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1']) + pgbench.wait() + + # PAGE backup + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + node.stop() + + # restore FULL backup into second node2 + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1')) + node1.cleanup() + + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + + # restore some data into node2 + self.restore_node(backup_dir, 'node', node2) + + # partial restore backup into node1 + self.restore_node( + backup_dir, 'node', + node1, options=[ + "--db-exclude=db1", + "--db-exclude=db5"]) + + pgdata1 = self.pgdata_content(node1.data_dir) + + # partial incremental restore backup into node2 + node2.port = node.port + node2.slow_start() + node2.stop() + print(self.restore_node( + backup_dir, 'node', + node2, options=[ + "--db-exclude=db1", + "--db-exclude=db5", + "-I", "lsn"])) + + pgdata2 = self.pgdata_content(node2.data_dir) + + self.compare_pgdata(pgdata1, pgdata2) + + self.set_auto_conf(node2, {'port': node2.port}) + + node2.slow_start() + + node2.safe_psql( + 'postgres', + 'select 1') + + try: + node2.safe_psql( + 'db1', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + try: + node2.safe_psql( + 'db5', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + with open(node2.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('PANIC', output) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node2]) + + def test_incremental_partial_restore_exclude_tablespace_checksum(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # cat_version = node.get_control_data()["Catalog version number"] + # version_specific_dir = 'PG_' + node.major_version_str + '_' + cat_version + + # PG_10_201707211 + # pg_tblspc/33172/PG_9.5_201510051/16386/ + + self.create_tblspace_in_node(node, 'somedata') + + node_tablespace = self.get_tblspace_path(node, 'somedata') + + tbl_oid = node.safe_psql( + 'postgres', + "SELECT oid " + "FROM pg_tablespace " + "WHERE spcname = 'somedata'").rstrip() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0} tablespace somedata'.format(i)) + + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + + db_list_splitted = db_list_raw.splitlines() + + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + # FULL backup + backup_id = self.backup_node(backup_dir, 'node', node) + + # node1 + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1')) + node1.cleanup() + node1_tablespace = self.get_tblspace_path(node1, 'somedata') + + # node2 + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + node2_tablespace = self.get_tblspace_path(node2, 'somedata') + + # in node2 restore full backup + self.restore_node( + backup_dir, 'node', + node2, options=[ + "-T", "{0}={1}".format( + node_tablespace, node2_tablespace)]) + + # partial restore into node1 + self.restore_node( + backup_dir, 'node', + node1, options=[ + "--db-exclude=db1", + "--db-exclude=db5", + "-T", "{0}={1}".format( + node_tablespace, node1_tablespace)]) + +# with open(os.path.join(node1_tablespace, "hello"), "w") as f: +# f.close() + pgdata1 = self.pgdata_content(node1.data_dir) + + # partial incremental restore into node2 + self.restore_node( + backup_dir, 'node', + node2, options=[ + "-I", "checksum", + "--db-exclude=db1", + "--db-exclude=db5", + "-T", "{0}={1}".format( + node_tablespace, node2_tablespace)]) + pgdata2 = self.pgdata_content(node2.data_dir) + + self.compare_pgdata(pgdata1, pgdata2) + + + self.set_auto_conf(node2, {'port': node2.port}) + node2.slow_start() + + node2.safe_psql( + 'postgres', + 'select 1') + + try: + node2.safe_psql( + 'db1', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + try: + node2.safe_psql( + 'db5', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + with open(node2.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('PANIC', output) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node2]) + +# check that MinRecPoint and BackupStartLsn are correctly used in case of --incrementa-lsn +# incremental restore + partial restore. diff --git a/tests/init_test.py b/tests/init.py similarity index 57% rename from tests/init_test.py rename to tests/init.py index 0b91dafa7..f5715d249 100644 --- a/tests/init_test.py +++ b/tests/init.py @@ -1,6 +1,7 @@ import os import unittest from .helpers.ptrack_helpers import dir_files, ProbackupTest, ProbackupException +import shutil module_name = 'init' @@ -14,14 +15,16 @@ def test_success(self): """Success normal init""" fname = self.id().split(".")[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname)) + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node')) self.init_pb(backup_dir) self.assertEqual( dir_files(backup_dir), ['backups', 'wal'] ) self.add_instance(backup_dir, 'node', node) - self.assertEqual("INFO: Instance 'node' successfully deleted\n", self.del_instance(backup_dir, 'node'), + self.assertIn( + "INFO: Instance 'node' successfully deleted", + self.del_instance(backup_dir, 'node'), '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) # Show non-existing instance @@ -30,8 +33,9 @@ def test_success(self): self.assertEqual(1, 0, 'Expecting Error due to show of non-existing instance. Output: {0} \n CMD: {1}'.format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - "ERROR: Instance 'node' does not exist in this backup catalog\n", + self.assertIn( + "ERROR: Instance 'node' does not exist in this backup catalog", + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(e.message, self.cmd)) # Delete non-existing instance @@ -40,8 +44,9 @@ def test_success(self): self.assertEqual(1, 0, 'Expecting Error due to delete of non-existing instance. Output: {0} \n CMD: {1}'.format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - "ERROR: Instance 'node1' does not exist in this backup catalog\n", + self.assertIn( + "ERROR: Instance 'node1' does not exist in this backup catalog", + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(e.message, self.cmd)) # Add instance without pgdata @@ -54,8 +59,9 @@ def test_success(self): self.assertEqual(1, 0, 'Expecting Error due to adding instance without pgdata. Output: {0} \n CMD: {1}'.format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - "ERROR: Required parameter not specified: PGDATA (-D, --pgdata)\n", + self.assertIn( + "ERROR: Required parameter not specified: PGDATA (-D, --pgdata)", + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(e.message, self.cmd)) # Clean after yourself @@ -66,15 +72,16 @@ def test_already_exist(self): """Failure with backup catalog already existed""" fname = self.id().split(".")[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname)) + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node')) self.init_pb(backup_dir) try: self.show_pb(backup_dir, 'node') self.assertEqual(1, 0, 'Expecting Error due to initialization in non-empty directory. Output: {0} \n CMD: {1}'.format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - "ERROR: Instance 'node' does not exist in this backup catalog\n", + self.assertIn( + "ERROR: Instance 'node' does not exist in this backup catalog", + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # Clean after yourself @@ -85,15 +92,66 @@ def test_abs_path(self): """failure with backup catalog should be given as absolute path""" fname = self.id().split(".")[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname)) + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node')) try: self.run_pb(["init", "-B", os.path.relpath("%s/backup" % node.base_dir, self.dir_path)]) self.assertEqual(1, 0, 'Expecting Error due to initialization with non-absolute path in --backup-path. Output: {0} \n CMD: {1}'.format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - "ERROR: -B, --backup-path must be an absolute path\n", + self.assertIn( + "ERROR: -B, --backup-path must be an absolute path", + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_add_instance_idempotence(self): + """ + https://github.com/postgrespro/pg_probackup/issues/219 + """ + fname = self.id().split(".")[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node')) + self.init_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node) + shutil.rmtree(os.path.join(backup_dir, 'backups', 'node')) + + dir_backups = os.path.join(backup_dir, 'backups', 'node') + dir_wal = os.path.join(backup_dir, 'wal', 'node') + + try: + self.add_instance(backup_dir, 'node', node) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Instance 'node' WAL archive directory already exists: ", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + try: + self.add_instance(backup_dir, 'node', node) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Instance 'node' WAL archive directory already exists: ", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/locking.py b/tests/locking.py new file mode 100644 index 000000000..2da2415ea --- /dev/null +++ b/tests/locking.py @@ -0,0 +1,540 @@ +import unittest +import os +from time import sleep +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException + + +module_name = 'locking' + + +class LockingTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_locking_running_validate_1(self): + """ + make node, take full backup, stop it in the middle + run validate, expect it to successfully executed, + concurrent RUNNING backup with pid file and active process is legal + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'RUNNING', self.show_pb(backup_dir, 'node')[1]['status']) + + validate_output = self.validate_pb( + backup_dir, options=['--log-level-console=LOG']) + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + self.assertIn( + "is using backup {0} and still is running".format(backup_id), + validate_output, + '\n Unexpected Validate Output: {0}\n'.format(repr(validate_output))) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'RUNNING', self.show_pb(backup_dir, 'node')[1]['status']) + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + def test_locking_running_validate_2(self): + """ + make node, take full backup, stop it in the middle, + kill process so no cleanup is done - pid file is in place, + run validate, expect it to not successfully executed, + RUNNING backup with pid file AND without active pid is legal, + but his status must be changed to ERROR and pid file is deleted + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + + gdb._execute('signal SIGKILL') + gdb.continue_execution_until_error() + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'RUNNING', self.show_pb(backup_dir, 'node')[1]['status']) + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because RUNNING backup is no longer active.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "which used backup {0} no longer exists".format( + backup_id) in e.message and + "Backup {0} has status RUNNING, change it " + "to ERROR and skip validation".format( + backup_id) in e.message and + "WARNING: Some backups are not valid" in + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'ERROR', self.show_pb(backup_dir, 'node')[1]['status']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_locking_running_validate_2_specific_id(self): + """ + make node, take full backup, stop it in the middle, + kill process so no cleanup is done - pid file is in place, + run validate on this specific backup, + expect it to not successfully executed, + RUNNING backup with pid file AND without active pid is legal, + but his status must be changed to ERROR and pid file is deleted + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + + gdb._execute('signal SIGKILL') + gdb.continue_execution_until_error() + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'RUNNING', self.show_pb(backup_dir, 'node')[1]['status']) + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + try: + self.validate_pb(backup_dir, 'node', backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because RUNNING backup is no longer active.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "which used backup {0} no longer exists".format( + backup_id) in e.message and + "Backup {0} has status RUNNING, change it " + "to ERROR and skip validation".format( + backup_id) in e.message and + "ERROR: Backup {0} has status: ERROR".format(backup_id) in + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'ERROR', self.show_pb(backup_dir, 'node')[1]['status']) + + try: + self.validate_pb(backup_dir, 'node', backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because backup has status ERROR.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} has status: ERROR".format(backup_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because backup has status ERROR.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Backup {0} has status ERROR. Skip validation".format( + backup_id) in e.message and + "WARNING: Some backups are not valid" in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_locking_running_3(self): + """ + make node, take full backup, stop it in the middle, + terminate process, delete pid file, + run validate, expect it to not successfully executed, + RUNNING backup without pid file AND without active pid is legal, + his status must be changed to ERROR + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True) + + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + + gdb.continue_execution_until_break(20) + + gdb._execute('signal SIGKILL') + gdb.continue_execution_until_error() + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'RUNNING', self.show_pb(backup_dir, 'node')[1]['status']) + + backup_id = self.show_pb(backup_dir, 'node')[1]['id'] + + os.remove( + os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.pid')) + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because RUNNING backup is no longer active.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "Backup {0} has status RUNNING, change it " + "to ERROR and skip validation".format( + backup_id) in e.message and + "WARNING: Some backups are not valid" in + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'ERROR', self.show_pb(backup_dir, 'node')[1]['status']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_locking_restore_locked(self): + """ + make node, take full backup, take two page backups, + launch validate on PAGE1 and stop it in the middle, + launch restore of PAGE2. + Expect restore to fail because validation of + intermediate backup is impossible + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + full_id = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE2 + self.backup_node(backup_dir, 'node', node, backup_type='page') + + gdb = self.validate_pb( + backup_dir, 'node', backup_id=backup_id, gdb=True) + + gdb.set_breakpoint('pgBackupValidate') + gdb.run_until_break() + + node.cleanup() + + try: + self.restore_node(backup_dir, 'node', node) + self.assertEqual( + 1, 0, + "Expecting Error because restore without whole chain validation " + "is prohibited unless --no-validate provided.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Cannot lock backup {0} directory\n".format(full_id) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_locking_restore_locked_without_validation(self): + """ + make node, take full backup, take page backup, + launch validate on FULL and stop it in the middle, + launch restore of PAGE. + Expect restore to fail because validation of + intermediate backup is impossible + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + backup_id = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + restore_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + gdb = self.validate_pb( + backup_dir, 'node', backup_id=backup_id, gdb=True) + + gdb.set_breakpoint('pgBackupValidate') + gdb.run_until_break() + + node.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node, options=['--no-validate']) + self.assertEqual( + 1, 0, + "Expecting Error because restore without whole chain validation " + "is prohibited unless --no-validate provided.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "Backup {0} is used without validation".format( + restore_id) in e.message and + 'is using backup {0} and still is running'.format( + backup_id) in e.message and + 'ERROR: Cannot lock backup' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_locking_concurrent_validate_and_backup(self): + """ + make node, take full backup, launch validate + and stop it in the middle, take page backup. + Expect PAGE backup to be successfully executed + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + self.backup_node(backup_dir, 'node', node) + + # PAGE2 + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + gdb = self.validate_pb( + backup_dir, 'node', backup_id=backup_id, gdb=True) + + gdb.set_breakpoint('pgBackupValidate') + gdb.run_until_break() + + # This PAGE backup is expected to be successfull + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_locking_concurren_restore_and_delete(self): + """ + make node, take full backup, launch restore + and stop it in the middle, delete full backup. + Expect it to fail. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + full_id = self.backup_node(backup_dir, 'node', node) + + node.cleanup() + gdb = self.restore_node(backup_dir, 'node', node, gdb=True) + + gdb.set_breakpoint('create_data_directories') + gdb.run_until_break() + + # This PAGE backup is expected to be successfull + try: + self.delete_pb(backup_dir, 'node', full_id) + self.assertEqual( + 1, 0, + "Expecting Error because backup is locked\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Cannot lock backup {0} directory".format(full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_backup_directory_name(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + full_id_1 = self.backup_node(backup_dir, 'node', node) + page_id_1 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + full_id_2 = self.backup_node(backup_dir, 'node', node) + page_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + node.cleanup() + + old_path = os.path.join(backup_dir, 'backups', 'node', full_id_1) + new_path = os.path.join(backup_dir, 'backups', 'node', 'hello_kitty') + + os.rename(old_path, new_path) + + # This PAGE backup is expected to be successfull + self.show_pb(backup_dir, 'node', full_id_1) + + self.validate_pb(backup_dir) + self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', full_id_1) + + self.restore_node(backup_dir, 'node', node, backup_id=full_id_1) + + self.delete_pb(backup_dir, 'node', full_id_1) + + old_path = os.path.join(backup_dir, 'backups', 'node', full_id_2) + new_path = os.path.join(backup_dir, 'backups', 'node', 'hello_kitty') + + self.set_backup( + backup_dir, 'node', full_id_2, options=['--note=hello']) + + self.merge_backup(backup_dir, 'node', page_id_2, options=["-j", "4"]) + + self.assertNotIn( + 'note', + self.show_pb(backup_dir, 'node', page_id_2)) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/logging.py b/tests/logging.py index e69de29bb..efde1d0b9 100644 --- a/tests/logging.py +++ b/tests/logging.py @@ -0,0 +1,302 @@ +import unittest +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +import datetime + +module_name = 'logging' + + +class LogTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + # PGPRO-2154 + def test_log_rotation(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', + options=['--log-rotation-age=1s', '--log-rotation-size=1MB']) + + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--log-level-file=verbose']) + + gdb = self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--log-level-file=verbose'], gdb=True) + + gdb.set_breakpoint('open_logfile') + gdb.run_until_break() + gdb.continue_execution_until_exit() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_log_filename_strftime(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', + options=['--log-rotation-age=1d']) + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=VERBOSE', + '--log-filename=pg_probackup-%a.log']) + + day_of_week = datetime.datetime.today().strftime("%a") + + path = os.path.join( + backup_dir, 'log', 'pg_probackup-{0}.log'.format(day_of_week)) + + self.assertTrue(os.path.isfile(path)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_truncate_rotation_file(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', + options=['--log-rotation-age=1d']) + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=VERBOSE']) + + rotation_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log.rotation') + + log_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log') + + log_file_size = os.stat(log_file_path).st_size + + self.assertTrue(os.path.isfile(rotation_file_path)) + + # truncate .rotation file + with open(rotation_file_path, "rb+", 0) as f: + f.truncate() + f.flush() + f.close + + output = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=LOG'], + return_id=False) + + # check that log file wasn`t rotated + self.assertGreater( + os.stat(log_file_path).st_size, + log_file_size) + + self.assertIn( + 'WARNING: cannot read creation timestamp from rotation file', + output) + + output = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=LOG'], + return_id=False) + + # check that log file wasn`t rotated + self.assertGreater( + os.stat(log_file_path).st_size, + log_file_size) + + self.assertNotIn( + 'WARNING: cannot read creation timestamp from rotation file', + output) + + self.assertTrue(os.path.isfile(rotation_file_path)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_unlink_rotation_file(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', + options=['--log-rotation-age=1d']) + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=VERBOSE']) + + rotation_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log.rotation') + + log_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log') + + log_file_size = os.stat(log_file_path).st_size + + self.assertTrue(os.path.isfile(rotation_file_path)) + + # unlink .rotation file + os.unlink(rotation_file_path) + + output = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=LOG'], + return_id=False) + + # check that log file wasn`t rotated + self.assertGreater( + os.stat(log_file_path).st_size, + log_file_size) + + self.assertIn( + 'WARNING: missing rotation file:', + output) + + self.assertTrue(os.path.isfile(rotation_file_path)) + + output = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=VERBOSE'], + return_id=False) + + self.assertNotIn( + 'WARNING: missing rotation file:', + output) + + # check that log file wasn`t rotated + self.assertGreater( + os.stat(log_file_path).st_size, + log_file_size) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_garbage_in_rotation_file(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', + options=['--log-rotation-age=1d']) + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=VERBOSE']) + + rotation_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log.rotation') + + log_file_path = os.path.join( + backup_dir, 'log', 'pg_probackup.log') + + log_file_size = os.stat(log_file_path).st_size + + self.assertTrue(os.path.isfile(rotation_file_path)) + + # mangle .rotation file + with open(rotation_file_path, "wt", 0) as f: + f.write(b"blah") + f.flush() + f.close + + output = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=LOG'], + return_id=False) + + # check that log file wasn`t rotated + self.assertGreater( + os.stat(log_file_path).st_size, + log_file_size) + + self.assertIn( + 'WARNING: rotation file', + output) + + self.assertIn( + 'has wrong creation timestamp', + output) + + self.assertTrue(os.path.isfile(rotation_file_path)) + + output = self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=LOG'], + return_id=False) + + self.assertNotIn( + 'WARNING: rotation file', + output) + + # check that log file wasn`t rotated + self.assertGreater( + os.stat(log_file_path).st_size, + log_file_size) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/merge.py b/tests/merge.py index 1be3dd8b3..3444056d2 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -2,14 +2,18 @@ import unittest import os -from .helpers.ptrack_helpers import ProbackupTest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from testgres import QueryException +import shutil +from datetime import datetime, timedelta +import time module_name = "merge" class MergeTest(ProbackupTest, unittest.TestCase): - def test_merge_full_page(self): + def test_basic_merge_full_page(self): """ Test MERGE command, it merges FULL backup with target PAGE backups """ @@ -18,17 +22,16 @@ def test_merge_full_page(self): # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=["--data-checksums"] - ) + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=["--data-checksums"]) self.init_pb(backup_dir) self.add_instance(backup_dir, "node", node) self.set_archiving(backup_dir, "node", node) - node.start() + node.slow_start() # Do full backup - self.backup_node(backup_dir, "node", node) + self.backup_node(backup_dir, "node", node, options=['--compress']) show_backup = self.show_pb(backup_dir, "node")[0] self.assertEqual(show_backup["status"], "OK") @@ -42,7 +45,7 @@ def test_merge_full_page(self): conn.commit() # Do first page backup - self.backup_node(backup_dir, "node", node, backup_type="page") + self.backup_node(backup_dir, "node", node, backup_type="page", options=['--compress']) show_backup = self.show_pb(backup_dir, "node")[1] # sanity check @@ -57,7 +60,9 @@ def test_merge_full_page(self): conn.commit() # Do second page backup - self.backup_node(backup_dir, "node", node, backup_type="page") + self.backup_node( + backup_dir, "node", node, + backup_type="page", options=['--compress']) show_backup = self.show_pb(backup_dir, "node")[2] page_id = show_backup["id"] @@ -69,7 +74,8 @@ def test_merge_full_page(self): self.assertEqual(show_backup["backup-mode"], "PAGE") # Merge all backups - self.merge_backup(backup_dir, "node", page_id) + self.merge_backup(backup_dir, "node", page_id, + options=["-j", "4"]) show_backups = self.show_pb(backup_dir, "node") # sanity check @@ -94,8 +100,7 @@ def test_merge_full_page(self): self.assertEqual(count1, count2) # Clean after yourself - node.cleanup() - self.del_test_dir(module_name, fname) + self.del_test_dir(module_name, fname, [node]) def test_merge_compressed_backups(self): """ @@ -106,18 +111,16 @@ def test_merge_compressed_backups(self): # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=["--data-checksums"] - ) + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=["--data-checksums"]) self.init_pb(backup_dir) self.add_instance(backup_dir, "node", node) self.set_archiving(backup_dir, "node", node) - node.start() + node.slow_start() # Do full compressed backup - self.backup_node(backup_dir, "node", node, options=[ - '--compress-algorithm=zlib']) + self.backup_node(backup_dir, "node", node, options=['--compress']) show_backup = self.show_pb(backup_dir, "node")[0] self.assertEqual(show_backup["status"], "OK") @@ -133,8 +136,7 @@ def test_merge_compressed_backups(self): # Do compressed page backup self.backup_node( - backup_dir, "node", node, backup_type="page", - options=['--compress-algorithm=zlib']) + backup_dir, "node", node, backup_type="page", options=['--compress']) show_backup = self.show_pb(backup_dir, "node")[1] page_id = show_backup["id"] @@ -142,7 +144,7 @@ def test_merge_compressed_backups(self): self.assertEqual(show_backup["backup-mode"], "PAGE") # Merge all backups - self.merge_backup(backup_dir, "node", page_id) + self.merge_backup(backup_dir, "node", page_id, options=['-j2']) show_backups = self.show_pb(backup_dir, "node") self.assertEqual(len(show_backups), 1) @@ -162,281 +164,541 @@ def test_merge_compressed_backups(self): node.cleanup() self.del_test_dir(module_name, fname) - # @unittest.skip("skip") - def test_merge_tablespaces(self): + def test_merge_compressed_backups_1(self): """ - Some test here + Test MERGE command with compressed backups """ + fname = self.id().split(".")[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, "backup") - def test_merge_page_truncate(self): - """ - make node, create table, take full backup, - delete last 3 pages, vacuum relation, - take page backup, merge full and page, - restore last page backup and check data correctness - """ - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'autovacuum': 'off' - } - ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=["--data-checksums"], + pg_options={'autovacuum': 'off'}) self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() - self.create_tblspace_in_node(node, 'somedata') + self.add_instance(backup_dir, "node", node) + self.set_archiving(backup_dir, "node", node) + node.slow_start() - node.safe_psql( - "postgres", - "create sequence t_seq; " - "create table t_heap tablespace somedata as select i as id, " - "md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1024) i;") + # Fill with data + node.pgbench_init(scale=10) - node.safe_psql( - "postgres", - "vacuum t_heap") + # Do compressed FULL backup + self.backup_node(backup_dir, "node", node, options=['--compress', '--stream']) + show_backup = self.show_pb(backup_dir, "node")[0] - self.backup_node(backup_dir, 'node', node) + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "FULL") - node.safe_psql( - "postgres", - "delete from t_heap where ctid >= '(11,0)'") - node.safe_psql( - "postgres", - "vacuum t_heap") + # Change data + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + # Do compressed DELTA backup self.backup_node( - backup_dir, 'node', node, backup_type='page') - - if self.paranoia: - pgdata = self.pgdata_content(node.data_dir) + backup_dir, "node", node, + backup_type="delta", options=['--compress', '--stream']) - page_id = self.show_pb(backup_dir, "node")[1]["id"] - self.merge_backup(backup_dir, "node", page_id) + # Change data + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() - self.validate_pb(backup_dir) + # Do compressed PAGE backup + self.backup_node( + backup_dir, "node", node, backup_type="page", options=['--compress']) - old_tablespace = self.get_tblspace_path(node, 'somedata') - new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') + pgdata = self.pgdata_content(node.data_dir) - self.restore_node( - backup_dir, 'node', node_restored, - options=[ - "-j", "4", - "-T", "{0}={1}".format(old_tablespace, new_tablespace), - "--recovery-target-action=promote"]) + show_backup = self.show_pb(backup_dir, "node")[2] + page_id = show_backup["id"] - # Physical comparison - if self.paranoia: - pgdata_restored = self.pgdata_content(node_restored.data_dir) - self.compare_pgdata(pgdata, pgdata_restored) + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "PAGE") - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.slow_start() + # Merge all backups + self.merge_backup(backup_dir, "node", page_id, options=['-j2']) + show_backups = self.show_pb(backup_dir, "node") - # Logical comparison - result1 = node.safe_psql( - "postgres", - "select * from t_heap") + self.assertEqual(len(show_backups), 1) + self.assertEqual(show_backups[0]["status"], "OK") + self.assertEqual(show_backups[0]["backup-mode"], "FULL") - result2 = node_restored.safe_psql( - "postgres", - "select * from t_heap") + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) - self.assertEqual(result1, result2) + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) # Clean after yourself + node.cleanup() self.del_test_dir(module_name, fname) - def test_merge_delta_truncate(self): + def test_merge_compressed_and_uncompressed_backups(self): """ - make node, create table, take full backup, - delete last 3 pages, vacuum relation, - take page backup, merge full and page, - restore last page backup and check data correctness + Test MERGE command with compressed and uncompressed backups """ - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + fname = self.id().split(".")[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, "backup") + + # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=["--data-checksums"], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', 'autovacuum': 'off' } ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname)) self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() - self.create_tblspace_in_node(node, 'somedata') + self.add_instance(backup_dir, "node", node) + self.set_archiving(backup_dir, "node", node) + node.slow_start() - node.safe_psql( - "postgres", - "create sequence t_seq; " - "create table t_heap tablespace somedata as select i as id, " - "md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1024) i;") + # Fill with data + node.pgbench_init(scale=10) - node.safe_psql( - "postgres", - "vacuum t_heap") + # Do compressed FULL backup + self.backup_node(backup_dir, "node", node, options=[ + '--compress-algorithm=zlib', '--stream']) + show_backup = self.show_pb(backup_dir, "node")[0] - self.backup_node(backup_dir, 'node', node) + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "FULL") - node.safe_psql( - "postgres", - "delete from t_heap where ctid >= '(11,0)'") - node.safe_psql( - "postgres", - "vacuum t_heap") + # Change data + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + # Do compressed DELTA backup self.backup_node( - backup_dir, 'node', node, backup_type='delta') - - if self.paranoia: - pgdata = self.pgdata_content(node.data_dir) + backup_dir, "node", node, backup_type="delta", + options=['--compress', '--stream']) - page_id = self.show_pb(backup_dir, "node")[1]["id"] - self.merge_backup(backup_dir, "node", page_id) + # Change data + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() - self.validate_pb(backup_dir) + # Do uncompressed PAGE backup + self.backup_node(backup_dir, "node", node, backup_type="page") - old_tablespace = self.get_tblspace_path(node, 'somedata') - new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') + pgdata = self.pgdata_content(node.data_dir) - self.restore_node( - backup_dir, 'node', node_restored, - options=[ - "-j", "4", - "-T", "{0}={1}".format(old_tablespace, new_tablespace), - "--recovery-target-action=promote"]) + show_backup = self.show_pb(backup_dir, "node")[2] + page_id = show_backup["id"] - # Physical comparison - if self.paranoia: - pgdata_restored = self.pgdata_content(node_restored.data_dir) - self.compare_pgdata(pgdata, pgdata_restored) + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "PAGE") - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.slow_start() + # Merge all backups + self.merge_backup(backup_dir, "node", page_id, options=['-j2']) + show_backups = self.show_pb(backup_dir, "node") - # Logical comparison - result1 = node.safe_psql( - "postgres", - "select * from t_heap") + self.assertEqual(len(show_backups), 1) + self.assertEqual(show_backups[0]["status"], "OK") + self.assertEqual(show_backups[0]["backup-mode"], "FULL") - result2 = node_restored.safe_psql( - "postgres", - "select * from t_heap") + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) - self.assertEqual(result1, result2) + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) # Clean after yourself + node.cleanup() self.del_test_dir(module_name, fname) - def test_merge_ptrack_truncate(self): + def test_merge_compressed_and_uncompressed_backups_1(self): """ - make node, create table, take full backup, - delete last 3 pages, vacuum relation, - take page backup, merge full and page, - restore last page backup and check data correctness + Test MERGE command with compressed and uncompressed backups """ - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + fname = self.id().split(".")[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, "backup") + + # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=["--data-checksums"], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', 'autovacuum': 'off' } ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname)) self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() - self.create_tblspace_in_node(node, 'somedata') + self.add_instance(backup_dir, "node", node) + self.set_archiving(backup_dir, "node", node) + node.slow_start() - node.safe_psql( - "postgres", - "create sequence t_seq; " - "create table t_heap tablespace somedata as select i as id, " - "md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1024) i;") + # Fill with data + node.pgbench_init(scale=5) - node.safe_psql( - "postgres", - "vacuum t_heap") + # Do compressed FULL backup + self.backup_node(backup_dir, "node", node, options=[ + '--compress-algorithm=zlib', '--stream']) + show_backup = self.show_pb(backup_dir, "node")[0] - self.backup_node(backup_dir, 'node', node) + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "FULL") - node.safe_psql( - "postgres", - "delete from t_heap where ctid >= '(11,0)'") - node.safe_psql( - "postgres", - "vacuum t_heap") + # Change data + pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum']) + pgbench.wait() + # Do uncompressed DELTA backup self.backup_node( - backup_dir, 'node', node, backup_type='delta') + backup_dir, "node", node, backup_type="delta", + options=['--stream']) - if self.paranoia: - pgdata = self.pgdata_content(node.data_dir) + # Change data + pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum']) + pgbench.wait() - page_id = self.show_pb(backup_dir, "node")[1]["id"] - self.merge_backup(backup_dir, "node", page_id) + # Do compressed PAGE backup + self.backup_node( + backup_dir, "node", node, backup_type="page", + options=['--compress-algorithm=zlib']) - self.validate_pb(backup_dir) + pgdata = self.pgdata_content(node.data_dir) - old_tablespace = self.get_tblspace_path(node, 'somedata') - new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') + show_backup = self.show_pb(backup_dir, "node")[2] + page_id = show_backup["id"] - self.restore_node( - backup_dir, 'node', node_restored, - options=[ - "-j", "4", - "-T", "{0}={1}".format(old_tablespace, new_tablespace), - "--recovery-target-action=promote"]) + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "PAGE") - # Physical comparison - if self.paranoia: - pgdata_restored = self.pgdata_content(node_restored.data_dir) + # Merge all backups + self.merge_backup(backup_dir, "node", page_id) + show_backups = self.show_pb(backup_dir, "node") + + self.assertEqual(len(show_backups), 1) + self.assertEqual(show_backups[0]["status"], "OK") + self.assertEqual(show_backups[0]["backup-mode"], "FULL") + + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + node.cleanup() + self.del_test_dir(module_name, fname) + + def test_merge_compressed_and_uncompressed_backups_2(self): + """ + Test MERGE command with compressed and uncompressed backups + """ + fname = self.id().split(".")[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, "backup") + + # Initialize instance and backup directory + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=["--data-checksums"], + pg_options={ + 'autovacuum': 'off' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, "node", node) + self.set_archiving(backup_dir, "node", node) + node.slow_start() + + # Fill with data + node.pgbench_init(scale=20) + + # Do uncompressed FULL backup + self.backup_node(backup_dir, "node", node) + show_backup = self.show_pb(backup_dir, "node")[0] + + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "FULL") + + # Change data + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # Do compressed DELTA backup + self.backup_node( + backup_dir, "node", node, backup_type="delta", + options=['--compress-algorithm=zlib', '--stream']) + + # Change data + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # Do uncompressed PAGE backup + self.backup_node( + backup_dir, "node", node, backup_type="page") + + pgdata = self.pgdata_content(node.data_dir) + + show_backup = self.show_pb(backup_dir, "node")[2] + page_id = show_backup["id"] + + self.assertEqual(show_backup["status"], "OK") + self.assertEqual(show_backup["backup-mode"], "PAGE") + + # Merge all backups + self.merge_backup(backup_dir, "node", page_id) + show_backups = self.show_pb(backup_dir, "node") + + self.assertEqual(len(show_backups), 1) + self.assertEqual(show_backups[0]["status"], "OK") + self.assertEqual(show_backups[0]["backup-mode"], "FULL") + + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + node.cleanup() + self.del_test_dir(module_name, fname) + + + # @unittest.skip("skip") + def test_merge_tablespaces(self): + """ + Create tablespace with table, take FULL backup, + create another tablespace with another table and drop previous + tablespace, take page backup, merge it and restore + + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node(node, 'somedata') + node.safe_psql( + "postgres", + "create table t_heap tablespace somedata as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,100) i" + ) + # FULL backup + self.backup_node(backup_dir, 'node', node) + + # Create new tablespace + self.create_tblspace_in_node(node, 'somedata1') + + node.safe_psql( + "postgres", + "create table t_heap1 tablespace somedata1 as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,100) i" + ) + + node.safe_psql( + "postgres", + "drop table t_heap" + ) + + # Drop old tablespace + node.safe_psql( + "postgres", + "drop tablespace somedata" + ) + + # PAGE backup + backup_id = self.backup_node(backup_dir, 'node', node, backup_type="page") + + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + shutil.rmtree( + self.get_tblspace_path(node, 'somedata1'), + ignore_errors=True) + node.cleanup() + + self.merge_backup(backup_dir, 'node', backup_id) + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + + # this compare should fall because we lost some directories + self.compare_pgdata(pgdata, pgdata_restored) + + # @unittest.skip("skip") + def test_merge_tablespaces_1(self): + """ + Create tablespace with table, take FULL backup, + create another tablespace with another table, take page backup, + drop first tablespace and take delta backup, + merge it and restore + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node(node, 'somedata') + + # FULL backup + self.backup_node(backup_dir, 'node', node) + node.safe_psql( + "postgres", + "create table t_heap tablespace somedata as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,100) i" + ) + + # CREATE NEW TABLESPACE + self.create_tblspace_in_node(node, 'somedata1') + + node.safe_psql( + "postgres", + "create table t_heap1 tablespace somedata1 as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,100) i" + ) + + # PAGE backup + self.backup_node(backup_dir, 'node', node, backup_type="page") + + node.safe_psql( + "postgres", + "drop table t_heap" + ) + node.safe_psql( + "postgres", + "drop tablespace somedata" + ) + + # DELTA backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta") + + pgdata = self.pgdata_content(node.data_dir) + + node.stop() + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + shutil.rmtree( + self.get_tblspace_path(node, 'somedata1'), + ignore_errors=True) + node.cleanup() + + self.merge_backup(backup_dir, 'node', backup_id) + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_merge_page_truncate(self): + """ + make node, create table, take full backup, + delete last 3 pages, vacuum relation, + take page backup, merge full and page, + restore last page backup and check data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '300s', + 'autovacuum': 'off'}) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node_restored.cleanup() + node.slow_start() + self.create_tblspace_in_node(node, 'somedata') + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap tablespace somedata as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i;") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "delete from t_heap where ctid >= '(11,0)'") + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + page_id = self.show_pb(backup_dir, "node")[1]["id"] + self.merge_backup(backup_dir, "node", page_id) + + self.validate_pb(backup_dir) + + old_tablespace = self.get_tblspace_path(node, 'somedata') + new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format(old_tablespace, new_tablespace)]) + + # Physical comparison + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) + self.set_auto_conf(node_restored, {'port': node_restored.port}) node_restored.slow_start() # Logical comparison @@ -452,3 +714,1946 @@ def test_merge_ptrack_truncate(self): # Clean after yourself self.del_test_dir(module_name, fname) + + def test_merge_delta_truncate(self): + """ + make node, create table, take full backup, + delete last 3 pages, vacuum relation, + take page backup, merge full and page, + restore last page backup and check data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '300s', + 'autovacuum': 'off'}) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node_restored.cleanup() + node.slow_start() + self.create_tblspace_in_node(node, 'somedata') + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap tablespace somedata as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i;") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "delete from t_heap where ctid >= '(11,0)'") + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + page_id = self.show_pb(backup_dir, "node")[1]["id"] + self.merge_backup(backup_dir, "node", page_id) + + self.validate_pb(backup_dir) + + old_tablespace = self.get_tblspace_path(node, 'somedata') + new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format(old_tablespace, new_tablespace)]) + + # Physical comparison + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + # Logical comparison + result1 = node.safe_psql( + "postgres", + "select * from t_heap") + + result2 = node_restored.safe_psql( + "postgres", + "select * from t_heap") + + self.assertEqual(result1, result2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_merge_ptrack_truncate(self): + """ + make node, create table, take full backup, + delete last 3 pages, vacuum relation, + take page backup, merge full and page, + restore last page backup and check data correctness + """ + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + ptrack_enable=True, + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap tablespace somedata as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i;") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "delete from t_heap where ctid >= '(11,0)'") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='ptrack') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + self.merge_backup(backup_dir, "node", page_id) + + self.validate_pb(backup_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + old_tablespace = self.get_tblspace_path(node, 'somedata') + new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format(old_tablespace, new_tablespace)]) + + # Physical comparison + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + # Logical comparison + result1 = node.safe_psql( + "postgres", + "select * from t_heap") + + result2 = node_restored.safe_psql( + "postgres", + "select * from t_heap") + + self.assertEqual(result1, result2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_merge_delta_delete(self): + """ + Make node, create tablespace with table, take full backup, + alter tablespace location, take delta backup, merge full and delta, + restore database. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '30s', + 'autovacuum': 'off' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node(node, 'somedata') + + # FULL backup + self.backup_node(backup_dir, 'node', node, options=["--stream"]) + + node.safe_psql( + "postgres", + "create table t_heap tablespace somedata as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,100) i" + ) + + node.safe_psql( + "postgres", + "delete from t_heap" + ) + + node.safe_psql( + "postgres", + "vacuum t_heap" + ) + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=["--stream"] + ) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + backup_id = self.show_pb(backup_dir, "node")[1]["id"] + self.merge_backup(backup_dir, "node", backup_id, options=["-j", "4"]) + + # RESTORE + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored') + ) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "-T", "{0}={1}".format( + self.get_tblspace_path(node, 'somedata'), + self.get_tblspace_path(node_restored, 'somedata') + ) + ] + ) + + # GET RESTORED PGDATA AND COMPARE + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # START RESTORED NODE + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_continue_failed_merge(self): + """ + Check that failed MERGE can be continued + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join( + module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,1000) i" + ) + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta' + ) + + node.safe_psql( + "postgres", + "delete from t_heap" + ) + + node.safe_psql( + "postgres", + "vacuum t_heap" + ) + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta' + ) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + backup_id = self.show_pb(backup_dir, "node")[2]["id"] + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + + gdb.set_breakpoint('backup_non_data_file_internal') + gdb.run_until_break() + + gdb.continue_execution_until_break(5) + + gdb._execute('signal SIGKILL') + gdb._execute('detach') + time.sleep(1) + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + # Try to continue failed MERGE + self.merge_backup(backup_dir, "node", backup_id) + + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_continue_failed_merge_with_corrupted_delta_backup(self): + """ + Fail merge via gdb, corrupt DELTA backup, try to continue merge + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,1000) i") + + old_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + node.safe_psql( + "postgres", + "update t_heap set id = 100500") + + node.safe_psql( + "postgres", + "vacuum full t_heap") + + new_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # DELTA BACKUP + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + backup_id = self.show_pb(backup_dir, "node")[1]["id"] + + # Failed MERGE + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('backup_non_data_file_internal') + gdb.run_until_break() + + gdb.continue_execution_until_break(2) + + gdb._execute('signal SIGKILL') + + # CORRUPT incremental backup + # read block from future + # block_size + backup_header = 8200 + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id_2, 'database', new_path) + with open(file, 'rb') as f: + f.seek(8200) + block_1 = f.read(8200) + f.close + + # write block from future + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', old_path) + with open(file, 'r+b') as f: + f.seek(8200) + f.write(block_1) + f.close + + # Try to continue failed MERGE + try: + print(self.merge_backup(backup_dir, "node", backup_id)) + self.assertEqual( + 1, 0, + "Expecting Error because of incremental backup corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Backup {0} has status CORRUPT, merge is aborted".format( + backup_id) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_continue_failed_merge_2(self): + """ + Check that failed MERGE on delete can be continued + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,1000) i") + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + node.safe_psql( + "postgres", + "delete from t_heap") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + backup_id = self.show_pb(backup_dir, "node")[2]["id"] + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + + gdb.set_breakpoint('pgFileDelete') + + gdb.run_until_break() + + gdb._execute('thread apply all bt') + + gdb.continue_execution_until_break(20) + + gdb._execute('thread apply all bt') + + gdb._execute('signal SIGKILL') + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + backup_id_deleted = self.show_pb(backup_dir, "node")[1]["id"] + + # TODO check that full backup has meta info is equal to DELETTING + + # Try to continue failed MERGE + self.merge_backup(backup_dir, "node", backup_id) + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_continue_failed_merge_3(self): + """ + Check that failed MERGE cannot be continued if intermediate + backup is missing. + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Create test data + node.safe_psql("postgres", "create sequence t_seq") + node.safe_psql( + "postgres", + "create table t_heap as select i as id, nextval('t_seq')" + " as t_seq, md5(i::text) as text, md5(i::text)::tsvector" + " as tsvector from generate_series(0,100000) i" + ) + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + # CREATE FEW PAGE BACKUP + i = 0 + + while i < 2: + + node.safe_psql( + "postgres", + "delete from t_heap" + ) + + node.safe_psql( + "postgres", + "vacuum t_heap" + ) + node.safe_psql( + "postgres", + "insert into t_heap select i as id, nextval('t_seq') as t_seq," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(100,200000) i" + ) + + # PAGE BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='page' + ) + i = i + 1 + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + backup_id_merge = self.show_pb(backup_dir, "node")[2]["id"] + backup_id_delete = self.show_pb(backup_dir, "node")[1]["id"] + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + gdb = self.merge_backup(backup_dir, "node", backup_id_merge, gdb=True) + + gdb.set_breakpoint('backup_non_data_file_internal') + gdb.run_until_break() + gdb.continue_execution_until_break(2) + + gdb._execute('signal SIGKILL') + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + # print(os.path.join(backup_dir, "backups", "node", backup_id_delete)) + + # DELETE PAGE1 + shutil.rmtree( + os.path.join(backup_dir, "backups", "node", backup_id_delete)) + + # Try to continue failed MERGE + try: + self.merge_backup(backup_dir, "node", backup_id_merge) + self.assertEqual( + 1, 0, + "Expecting Error because of backup corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Incremental chain is broken, " + "merge is impossible to finish" in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_merge_different_compression_algo(self): + """ + Check that backups with different compression algorithms can be merged + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=['--compress-algorithm=zlib']) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,1000) i") + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--compress-algorithm=pglz']) + + node.safe_psql( + "postgres", + "delete from t_heap") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + backup_id = self.show_pb(backup_dir, "node")[2]["id"] + + self.merge_backup(backup_dir, "node", backup_id) + + self.del_test_dir(module_name, fname) + + def test_merge_different_wal_modes(self): + """ + Check that backups with different wal modes can be merged + correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL stream backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # DELTA archive backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + self.assertEqual( + 'ARCHIVE', self.show_pb(backup_dir, 'node', backup_id)['wal']) + + # DELTA stream backup + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + self.assertEqual( + 'STREAM', self.show_pb(backup_dir, 'node', backup_id)['wal']) + + self.del_test_dir(module_name, fname) + + def test_crash_after_opening_backup_control_1(self): + """ + check that crashing after opening backup.control + for writing will not result in losing backup metadata + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL stream backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # DELTA archive backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('write_backup_filelist') + gdb.run_until_break() + + gdb.set_breakpoint('write_backup') + gdb.continue_execution_until_break() + gdb.set_breakpoint('pgBackupWriteControl') + gdb.continue_execution_until_break() + + gdb._execute('signal SIGKILL') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[1]['status']) + + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_crash_after_opening_backup_control_2(self): + """ + check that crashing after opening backup_content.control + for writing will not result in losing metadata about backup files + TODO: rewrite + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Add data + node.pgbench_init(scale=3) + + # FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Change data + pgbench = node.pgbench(options=['-T', '20', '-c', '2']) + pgbench.wait() + + path = node.safe_psql( + 'postgres', + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + fsm_path = path + '_fsm' + + node.safe_psql( + 'postgres', + 'vacuum pgbench_accounts') + + # DELTA backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('write_backup_filelist') + gdb.run_until_break() + + gdb.set_breakpoint('sprintf') + gdb.continue_execution_until_break(1) + + gdb._execute('signal SIGKILL') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[1]['status']) + + # In to_backup drop file that comes from from_backup + # emulate crash during previous merge + file_to_remove = os.path.join( + backup_dir, 'backups', + 'node', full_id, 'database', fsm_path) + + # print(file_to_remove) + + os.remove(file_to_remove) + + # Continue failed merge + self.merge_backup(backup_dir, "node", backup_id) + + node.cleanup() + + # restore merge backup + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_losing_file_after_failed_merge(self): + """ + check that crashing after opening backup_content.control + for writing will not result in losing metadata about backup files + TODO: rewrite + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Add data + node.pgbench_init(scale=1) + + # FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Change data + node.safe_psql( + 'postgres', + "update pgbench_accounts set aid = aid + 1005000") + + path = node.safe_psql( + 'postgres', + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + node.safe_psql( + 'postgres', + "VACUUM pgbench_accounts") + + vm_path = path + '_vm' + + # DELTA backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('write_backup_filelist') + gdb.run_until_break() + + gdb.set_breakpoint('sprintf') + gdb.continue_execution_until_break(20) + + gdb._execute('signal SIGKILL') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[1]['status']) + + # In to_backup drop file that comes from from_backup + # emulate crash during previous merge + file_to_remove = os.path.join( + backup_dir, 'backups', + 'node', full_id, 'database', vm_path) + + os.remove(file_to_remove) + + # Try to continue failed MERGE + self.merge_backup(backup_dir, "node", backup_id) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.del_test_dir(module_name, fname) + + def test_failed_merge_after_delete(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # add database + node.safe_psql( + 'postgres', + 'CREATE DATABASE testdb') + + dboid = node.safe_psql( + "postgres", + "select oid from pg_database where datname = 'testdb'").rstrip() + + # take FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # drop database + node.safe_psql( + 'postgres', + 'DROP DATABASE testdb') + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + gdb = self.merge_backup( + backup_dir, 'node', page_id, + gdb=True, options=['--log-level-console=verbose']) + + gdb.set_breakpoint('delete_backup_files') + gdb.run_until_break() + + gdb.set_breakpoint('pgFileDelete') + gdb.continue_execution_until_break(20) + + gdb._execute('signal SIGKILL') + + # backup half-merged + self.assertEqual( + 'MERGED', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + full_id, self.show_pb(backup_dir, 'node')[0]['id']) + + db_path = os.path.join( + backup_dir, 'backups', 'node', + full_id, 'database', 'base', dboid) + + try: + self.merge_backup( + backup_dir, 'node', page_id_2, + options=['--log-level-console=verbose']) + self.assertEqual( + 1, 0, + "Expecting Error because of missing parent.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Full backup {0} has unfinished merge with backup {1}".format( + full_id, page_id) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.del_test_dir(module_name, fname) + + def test_failed_merge_after_delete_1(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=1) + + page_1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGE1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_1, 'ERROR') + + pgdata = self.pgdata_content(node.data_dir) + + # add data + pgbench = node.pgbench(options=['-T', '10', '-c', '2', '--no-vacuum']) + pgbench.wait() + + # take PAGE2 backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGE1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_1, 'OK') + + gdb = self.merge_backup( + backup_dir, 'node', page_id, + gdb=True, options=['--log-level-console=verbose']) + + gdb.set_breakpoint('delete_backup_files') + gdb.run_until_break() + +# gdb.set_breakpoint('parray_bsearch') +# gdb.continue_execution_until_break() + + gdb.set_breakpoint('pgFileDelete') + gdb.continue_execution_until_break(30) + gdb._execute('signal SIGKILL') + + self.assertEqual( + full_id, self.show_pb(backup_dir, 'node')[0]['id']) + + # restore + node.cleanup() + try: + #self.restore_node(backup_dir, 'node', node, backup_id=page_1) + self.restore_node(backup_dir, 'node', node) + self.assertEqual( + 1, 0, + "Expecting Error because of orphan status.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} is orphan".format(page_1), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.del_test_dir(module_name, fname) + + def test_failed_merge_after_delete_2(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=1) + + page_1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # add data + pgbench = node.pgbench(options=['-T', '10', '-c', '2', '--no-vacuum']) + pgbench.wait() + + # take PAGE2 backup + page_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + gdb = self.merge_backup( + backup_dir, 'node', page_2, gdb=True, + options=['--log-level-console=VERBOSE']) + + gdb.set_breakpoint('pgFileDelete') + gdb.run_until_break() + gdb.continue_execution_until_break(2) + gdb._execute('signal SIGKILL') + + self.delete_pb(backup_dir, 'node', backup_id=page_2) + + # rerun merge + try: + #self.restore_node(backup_dir, 'node', node, backup_id=page_1) + self.merge_backup(backup_dir, 'node', page_1) + self.assertEqual( + 1, 0, + "Expecting Error because of backup is missing.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Full backup {0} has unfinished merge " + "with backup {1}".format(full_id, page_2), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.del_test_dir(module_name, fname) + + def test_failed_merge_after_delete_3(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # add database + node.safe_psql( + 'postgres', + 'CREATE DATABASE testdb') + + dboid = node.safe_psql( + "postgres", + "select oid from pg_database where datname = 'testdb'").rstrip() + + # take FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # drop database + node.safe_psql( + 'postgres', + 'DROP DATABASE testdb') + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # create database + node.safe_psql( + 'postgres', + 'create DATABASE testdb') + + page_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + gdb = self.merge_backup( + backup_dir, 'node', page_id, + gdb=True, options=['--log-level-console=verbose']) + + gdb.set_breakpoint('delete_backup_files') + gdb.run_until_break() + + gdb.set_breakpoint('pgFileDelete') + gdb.continue_execution_until_break(20) + + gdb._execute('signal SIGKILL') + + # backup half-merged + self.assertEqual( + 'MERGED', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + full_id, self.show_pb(backup_dir, 'node')[0]['id']) + + db_path = os.path.join( + backup_dir, 'backups', 'node', full_id) + + # FULL backup is missing now + shutil.rmtree(db_path) + + try: + self.merge_backup( + backup_dir, 'node', page_id_2, + options=['--log-level-console=verbose']) + self.assertEqual( + 1, 0, + "Expecting Error because of missing parent.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Failed to find parent full backup for {0}".format( + page_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_merge_backup_from_future(self): + """ + take FULL backup, table PAGE backup from future, + try to merge page with FULL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=5) + + # Take PAGE from future + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + with open( + os.path.join( + backup_dir, 'backups', 'node', + backup_id, "backup.control"), "a") as conf: + conf.write("start-time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() + timedelta(days=3))) + + # rename directory + new_id = self.show_pb(backup_dir, 'node')[1]['id'] + + os.rename( + os.path.join(backup_dir, 'backups', 'node', backup_id), + os.path.join(backup_dir, 'backups', 'node', new_id)) + + pgbench = node.pgbench(options=['-T', '5', '-c', '1', '--no-vacuum']) + pgbench.wait() + + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + pgdata = self.pgdata_content(node.data_dir) + + result = node.safe_psql( + 'postgres', + 'SELECT * from pgbench_accounts') + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', + node_restored, backup_id=backup_id) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # check that merged backup has the same state as + node_restored.cleanup() + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + self.restore_node( + backup_dir, 'node', + node_restored, backup_id=backup_id) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + self.set_auto_conf( + node_restored, + {'port': node_restored.port}) + node_restored.slow_start() + + result_new = node_restored.safe_psql( + 'postgres', + 'SELECT * from pgbench_accounts') + + self.assertTrue(result, result_new) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_merge_multiple_descendants(self): + """ + PAGEb3 + | PAGEa3 + PAGEb2 / + | PAGEa2 / + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULLb backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Change PAGEa1 to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa2 and FULL to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + # Change PAGEb2, PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Change FULLa to OK + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa3 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2, PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + # Change PAGEa3, PAGEa2 and FULLa status to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + self.merge_backup( + backup_dir, 'node', page_id_a2, + options=['--merge-expired', '--log-level-console=log']) + + try: + self.merge_backup( + backup_dir, 'node', page_id_a3, + options=['--merge-expired', '--log-level-console=log']) + self.assertEqual( + 1, 0, + "Expecting Error because of parent FULL backup is missing.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Failed to find parent full backup for {0}".format( + page_id_a3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_smart_merge(self): + """ + make node, create database, take full backup, drop database, + take PAGE backup and merge it into FULL, + make sure that files from dropped database are not + copied during restore + https://github.com/postgrespro/pg_probackup/issues/63 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # create database + node.safe_psql( + "postgres", + "CREATE DATABASE testdb") + + # take FULL backup + full_id = self.backup_node(backup_dir, 'node', node) + + # drop database + node.safe_psql( + "postgres", + "DROP DATABASE testdb") + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # get delta between FULL and PAGE filelists + filelist_full = self.get_backup_filelist( + backup_dir, 'node', full_id) + + filelist_page = self.get_backup_filelist( + backup_dir, 'node', page_id) + + filelist_diff = self.get_backup_filelist_diff( + filelist_full, filelist_page) + + # merge PAGE backup + self.merge_backup( + backup_dir, 'node', page_id, + options=['--log-level-file=VERBOSE']) + + logfile = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(logfile, 'r') as f: + logfile_content = f.read() + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + def test_idempotent_merge(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # add database + node.safe_psql( + 'postgres', + 'CREATE DATABASE testdb') + + # take FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # create database + node.safe_psql( + 'postgres', + 'create DATABASE testdb1') + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # create database + node.safe_psql( + 'postgres', + 'create DATABASE testdb2') + + page_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + gdb = self.merge_backup( + backup_dir, 'node', page_id_2, + gdb=True, options=['--log-level-console=verbose']) + + gdb.set_breakpoint('delete_backup_files') + gdb.run_until_break() + gdb.remove_all_breakpoints() + + gdb.set_breakpoint('rename') + gdb.continue_execution_until_break() + gdb.continue_execution_until_break(2) + + gdb._execute('signal SIGKILL') + + show_backups = self.show_pb(backup_dir, "node") + self.assertEqual(len(show_backups), 1) + + self.assertEqual( + 'MERGED', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + full_id, self.show_pb(backup_dir, 'node')[0]['id']) + + self.merge_backup(backup_dir, 'node', page_id_2) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + page_id_2, self.show_pb(backup_dir, 'node')[0]['id']) + + self.del_test_dir(module_name, fname, [node]) + + def test_merge_correct_inheritance(self): + """ + Make sure that backup metainformation fields + 'note' and 'expire-time' are correctly inherited + during merge + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # add database + node.safe_psql( + 'postgres', + 'CREATE DATABASE testdb') + + # take FULL backup + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + # create database + node.safe_psql( + 'postgres', + 'create DATABASE testdb1') + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + self.set_backup( + backup_dir, 'node', page_id, options=['--note=hello', '--ttl=20d']) + + page_meta = self.show_pb(backup_dir, 'node', page_id) + + self.merge_backup(backup_dir, 'node', page_id) + + print(self.show_pb(backup_dir, 'node', page_id)) + + self.assertEqual( + page_meta['note'], + self.show_pb(backup_dir, 'node', page_id)['note']) + + self.assertEqual( + page_meta['expire-time'], + self.show_pb(backup_dir, 'node', page_id)['expire-time']) + + self.del_test_dir(module_name, fname, [node]) + + def test_merge_correct_inheritance_1(self): + """ + Make sure that backup metainformation fields + 'note' and 'expire-time' are correctly inherited + during merge + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # add database + node.safe_psql( + 'postgres', + 'CREATE DATABASE testdb') + + # take FULL backup + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--note=hello', '--ttl=20d']) + + # create database + node.safe_psql( + 'postgres', + 'create DATABASE testdb1') + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + self.merge_backup(backup_dir, 'node', page_id) + + self.assertNotIn( + 'note', + self.show_pb(backup_dir, 'node', page_id)) + + self.assertNotIn( + 'expire-time', + self.show_pb(backup_dir, 'node', page_id)) + + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_multi_timeline_merge(self): + """ + Check that backup in PAGE mode choose + parent backup correctly: + t12 /---P--> + ... + t3 /----> + t2 /----> + t1 -F-----D-> + + P must have F as parent + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql("postgres", "create extension pageinspect") + + try: + node.safe_psql( + "postgres", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "postgres", + "create extension amcheck_next") + + node.pgbench_init(scale=20) + full_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node, backup_type='delta') + + node.cleanup() + self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + node.slow_start() + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # create timelines + for i in range(2, 7): + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target=latest', + '--recovery-target-action=promote', + '--recovery-target-timeline={0}'.format(i)]) + node.slow_start() + + # at this point there is i+1 timeline + pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # create backup at 2, 4 and 6 timeline + if i % 2 == 0: + self.backup_node(backup_dir, 'node', node, backup_type='page') + + page_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + pgdata = self.pgdata_content(node.data_dir) + + self.merge_backup(backup_dir, 'node', page_id) + + result = node.safe_psql( + "postgres", "select * from pgbench_accounts") + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + result_new = node_restored.safe_psql( + "postgres", "select * from pgbench_accounts") + + self.assertEqual(result, result_new) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.checkdb_node( + backup_dir, + 'node', + options=[ + '--amcheck', + '-d', 'postgres', '-p', str(node.port)]) + + self.checkdb_node( + backup_dir, + 'node', + options=[ + '--amcheck', + '-d', 'postgres', '-p', str(node_restored.port)]) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node, node_restored]) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_merge_page_header_map_retry(self): + """ + page header map cannot be trusted when + running retry + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=20) + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + delta_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + gdb = self.merge_backup(backup_dir, 'node', delta_id, gdb=True) + + # our goal here is to get full backup with merged data files, + # but with old page header map + gdb.set_breakpoint('cleanup_header_map') + gdb.run_until_break() + gdb._execute('signal SIGKILL') + + self.merge_backup(backup_dir, 'node', delta_id) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + +# 1. Need new test with corrupted FULL backup +# 2. different compression levels diff --git a/tests/option_test.py b/tests/option.py similarity index 77% rename from tests/option_test.py rename to tests/option.py index 8bd473fa9..023a0c2c6 100644 --- a/tests/option_test.py +++ b/tests/option.py @@ -12,9 +12,6 @@ class OptionTest(ProbackupTest, unittest.TestCase): # @unittest.expectedFailure def test_help_1(self): """help options""" - self.maxDiff = None - fname = self.id().split(".")[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') with open(os.path.join(self.dir_path, "expected/option_help.out"), "rb") as help_out: self.assertEqual( self.run_pb(["--help"]), @@ -24,8 +21,6 @@ def test_help_1(self): # @unittest.skip("skip") def test_version_2(self): """help options""" - fname = self.id().split(".")[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') with open(os.path.join(self.dir_path, "expected/option_version.out"), "rb") as version_out: self.assertIn( version_out.read().decode("utf-8"), @@ -35,14 +30,14 @@ def test_version_2(self): # @unittest.skip("skip") def test_without_backup_path_3(self): """backup command failure without backup mode option""" - fname = self.id().split(".")[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') try: self.run_pb(["backup", "-b", "full"]) self.assertEqual(1, 0, "Expecting Error because '-B' parameter is not specified.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, 'ERROR: required parameter not specified: BACKUP_PATH (-B, --backup-path)\n', + self.assertIn( + 'ERROR: required parameter not specified: BACKUP_PATH (-B, --backup-path)', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) @@ -52,7 +47,7 @@ def test_options_4(self): fname = self.id().split(".")[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node')) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) @@ -63,8 +58,9 @@ def test_options_4(self): self.assertEqual(1, 0, "Expecting Error because 'instance' parameter is not specified.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: required parameter not specified: --instance\n', + self.assertIn( + 'ERROR: required parameter not specified: --instance', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # backup command failure without backup mode option @@ -73,7 +69,8 @@ def test_options_4(self): self.assertEqual(1, 0, "Expecting Error because '-b' parameter is not specified.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertIn('ERROR: required parameter not specified: BACKUP_MODE (-b, --backup-mode)', + self.assertIn( + 'ERROR: required parameter not specified: BACKUP_MODE (-b, --backup-mode)', e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) @@ -83,8 +80,9 @@ def test_options_4(self): self.assertEqual(1, 0, "Expecting Error because backup-mode parameter is invalid.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: invalid backup-mode "bad"\n', + self.assertIn( + 'ERROR: invalid backup-mode "bad"', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # delete failure without delete options @@ -94,8 +92,10 @@ def test_options_4(self): self.assertEqual(1, 0, "Expecting Error because delete options are omitted.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: You must specify at least one of the delete options: --expired |--wal |--backup_id\n', + self.assertIn( + 'ERROR: You must specify at least one of the delete options: ' + '--delete-expired |--delete-wal |--merge-expired |--status |(-i, --backup-id)', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) @@ -106,7 +106,9 @@ def test_options_4(self): self.assertEqual(1, 0, "Expecting Error because backup ID is omitted.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertTrue("option requires an argument -- 'i'" in e.message, + self.assertIn( + "option requires an argument -- 'i'", + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # Clean after yourself @@ -118,19 +120,23 @@ def test_options_5(self): fname = self.id().split(".")[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - pg_options={ - 'wal_level': 'logical', - 'max_wal_senders': '2'}) + base_dir=os.path.join(module_name, fname, 'node')) + + output = self.init_pb(backup_dir) + self.assertIn( + "INFO: Backup catalog", + output) - self.assertEqual("INFO: Backup catalog '{0}' successfully inited\n".format(backup_dir), - self.init_pb(backup_dir)) + self.assertIn( + "successfully inited", + output) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() # syntax error in pg_probackup.conf - with open(os.path.join(backup_dir, "backups", "node", "pg_probackup.conf"), "a") as conf: + conf_file = os.path.join(backup_dir, "backups", "node", "pg_probackup.conf") + with open(conf_file, "a") as conf: conf.write(" = INFINITE\n") try: self.backup_node(backup_dir, 'node', node) @@ -138,8 +144,9 @@ def test_options_5(self): self.assertEqual(1, 0, "Expecting Error because of garbage in pg_probackup.conf.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: syntax error in " = INFINITE"\n', + self.assertIn( + 'ERROR: Syntax error in " = INFINITE', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) self.clean_pb(backup_dir) @@ -147,7 +154,7 @@ def test_options_5(self): self.add_instance(backup_dir, 'node', node) # invalid value in pg_probackup.conf - with open(os.path.join(backup_dir, "backups", "node", "pg_probackup.conf"), "a") as conf: + with open(conf_file, "a") as conf: conf.write("BACKUP_MODE=\n") try: @@ -156,8 +163,9 @@ def test_options_5(self): self.assertEqual(1, 0, "Expecting Error because of invalid backup-mode in pg_probackup.conf.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: invalid backup-mode ""\n', + self.assertIn( + 'ERROR: Invalid option "BACKUP_MODE" in file', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) self.clean_pb(backup_dir) @@ -165,7 +173,7 @@ def test_options_5(self): self.add_instance(backup_dir, 'node', node) # Command line parameters should override file values - with open(os.path.join(backup_dir, "backups", "node", "pg_probackup.conf"), "a") as conf: + with open(conf_file, "a") as conf: conf.write("retention-redundancy=1\n") self.assertEqual(self.show_config(backup_dir, 'node')['retention-redundancy'], '1') @@ -177,12 +185,13 @@ def test_options_5(self): self.assertEqual(1, 0, "Expecting Error because option system-identifier cannot be specified in command line.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: option system-identifier cannot be specified in command line\n', + self.assertIn( + 'ERROR: Option system-identifier cannot be specified in command line', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # invalid value in pg_probackup.conf - with open(os.path.join(backup_dir, "backups", "node", "pg_probackup.conf"), "a") as conf: + with open(conf_file, "a") as conf: conf.write("SMOOTH_CHECKPOINT=FOO\n") try: @@ -191,8 +200,9 @@ def test_options_5(self): self.assertEqual(1, 0, "Expecting Error because option -C should be boolean.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - "ERROR: option -C, --smooth-checkpoint should be a boolean: 'FOO'\n", + self.assertIn( + 'ERROR: Invalid option "SMOOTH_CHECKPOINT" in file', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) self.clean_pb(backup_dir) @@ -200,8 +210,7 @@ def test_options_5(self): self.add_instance(backup_dir, 'node', node) # invalid option in pg_probackup.conf - pbconf_path = os.path.join(backup_dir, "backups", "node", "pg_probackup.conf") - with open(pbconf_path, "a") as conf: + with open(conf_file, "a") as conf: conf.write("TIMELINEID=1\n") try: @@ -210,8 +219,9 @@ def test_options_5(self): self.assertEqual(1, 0, 'Expecting Error because of invalid option "TIMELINEID".\n Output: {0} \n CMD: {1}'.format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual(e.message, - 'ERROR: invalid option "TIMELINEID" in file "{0}"\n'.format(pbconf_path), + self.assertIn( + 'ERROR: Invalid option "TIMELINEID" in file', + e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # Clean after yourself diff --git a/tests/page.py b/tests/page.py index ef7122b68..201f825e8 100644 --- a/tests/page.py +++ b/tests/page.py @@ -1,16 +1,19 @@ import os import unittest from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from testgres import QueryException from datetime import datetime, timedelta import subprocess +import gzip +import shutil module_name = 'page' -class PageBackupTest(ProbackupTest, unittest.TestCase): +class PageTest(ProbackupTest, unittest.TestCase): # @unittest.skip("skip") - def test_page_vacuum_truncate(self): + def test_basic_page_vacuum_truncate(self): """ make node, create table, take full backup, delete last 3 pages, vacuum relation, @@ -20,24 +23,21 @@ def test_page_vacuum_truncate(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '300s', - 'autovacuum': 'off' - } - ) + 'autovacuum': 'off'}) + node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node_restored')) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) node_restored.cleanup() - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') node.safe_psql( @@ -54,6 +54,7 @@ def test_page_vacuum_truncate(self): self.backup_node(backup_dir, 'node', node) + # TODO: make it dynamic node.safe_psql( "postgres", "delete from t_heap where ctid >= '(11,0)'") @@ -62,8 +63,7 @@ def test_page_vacuum_truncate(self): "vacuum t_heap") self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') self.backup_node( backup_dir, 'node', node, backup_type='page') @@ -78,16 +78,14 @@ def test_page_vacuum_truncate(self): backup_dir, 'node', node_restored, options=[ "-j", "4", - "-T", "{0}={1}".format(old_tablespace, new_tablespace), - "--recovery-target-action=promote"]) + "-T", "{0}={1}".format(old_tablespace, new_tablespace)]) # Physical comparison if self.paranoia: pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) + self.set_auto_conf(node_restored, {'port': node_restored.port}) node_restored.slow_start() # Logical comparison @@ -101,6 +99,80 @@ def test_page_vacuum_truncate(self): self.assertEqual(result1, result2) + # Clean after yourself + self.del_test_dir(module_name, fname, [node, node_restored]) + + # @unittest.skip("skip") + def test_page_vacuum_truncate_1(self): + """ + make node, create table, take full backup, + delete all data, vacuum relation, + take page backup, insert some data, + take second page backup and check data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1024) i") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "delete from t_heap") + + node.safe_psql( + "postgres", + "vacuum t_heap") + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1) i") + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + + # Physical comparison + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + # Clean after yourself self.del_test_dir(module_name, fname) @@ -114,19 +186,17 @@ def test_page_stream(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s'} ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -149,7 +219,10 @@ def test_page_stream(self): page_result = node.execute("postgres", "SELECT * FROM t_heap") page_backup_id = self.backup_node( backup_dir, 'node', node, - backup_type='page', options=['--stream']) + backup_type='page', options=['--stream', '-j', '4']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) # Drop Node node.cleanup() @@ -162,6 +235,7 @@ def test_page_stream(self): backup_id=full_backup_id, options=["-j", "4"]), '\n Unexpected Error Message: {0}\n' ' CMD: {1}'.format(repr(self.output), self.cmd)) + node.slow_start() full_result_new = node.execute("postgres", "SELECT * FROM t_heap") self.assertEqual(full_result, full_result_new) @@ -175,6 +249,12 @@ def test_page_stream(self): backup_id=page_backup_id, options=["-j", "4"]), '\n Unexpected Error Message: {0}\n' ' CMD: {1}'.format(repr(self.output), self.cmd)) + + # GET RESTORED PGDATA AND COMPARE + if self.paranoia: + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + node.slow_start() page_result_new = node.execute("postgres", "SELECT * FROM t_heap") self.assertEqual(page_result, page_result_new) @@ -193,25 +273,23 @@ def test_page_archive(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s'} ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( "postgres", "create table t_heap as select i as id, md5(i::text) as text, " - "md5(i::text)::tsvector as tsvector from generate_series(0,1) i") + "md5(i::text)::tsvector as tsvector from generate_series(0,100) i") full_result = node.execute("postgres", "SELECT * FROM t_heap") full_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='full') @@ -221,10 +299,14 @@ def test_page_archive(self): "postgres", "insert into t_heap select i as id, " "md5(i::text) as text, md5(i::text)::tsvector as tsvector " - "from generate_series(0,2) i") + "from generate_series(100, 200) i") page_result = node.execute("postgres", "SELECT * FROM t_heap") page_backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='page') + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) # Drop Node node.cleanup() @@ -241,6 +323,7 @@ def test_page_archive(self): "--recovery-target-action=promote"]), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd)) + node.slow_start() full_result_new = node.execute("postgres", "SELECT * FROM t_heap") @@ -259,6 +342,12 @@ def test_page_archive(self): "--recovery-target-action=promote"]), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd)) + + # GET RESTORED PGDATA AND COMPARE + if self.paranoia: + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + node.slow_start() page_result_new = node.execute("postgres", "SELECT * FROM t_heap") @@ -277,24 +366,20 @@ def test_page_multiple_segments(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'fsync': 'off', 'shared_buffers': '1GB', 'maintenance_work_mem': '1GB', 'autovacuum': 'off', - 'full_page_writes': 'off' - } - ) + 'full_page_writes': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') @@ -306,20 +391,18 @@ def test_page_multiple_segments(self): # PGBENCH STUFF pgbench = node.pgbench(options=['-T', '50', '-c', '1', '--no-vacuum']) pgbench.wait() - node.safe_psql("postgres", "checkpoint") # GET LOGICAL CONTENT FROM NODE result = node.safe_psql("postgres", "select * from pgbench_accounts") # PAGE BACKUP - self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=["--log-level-file=verbose"]) + self.backup_node(backup_dir, 'node', node, backup_type='page') + # GET PHYSICAL CONTENT FROM NODE pgdata = self.pgdata_content(node.data_dir) # RESTORE NODE restored_node = self.make_simple_node( - base_dir="{0}/{1}/restored_node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'restored_node')) restored_node.cleanup() tblspc_path = self.get_tblspace_path(node, 'somedata') tblspc_path_new = self.get_tblspace_path( @@ -329,15 +412,13 @@ def test_page_multiple_segments(self): backup_dir, 'node', restored_node, options=[ "-j", "4", - "--recovery-target-action=promote", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new)]) # GET PHYSICAL CONTENT FROM NODE_RESTORED pgdata_restored = self.pgdata_content(restored_node.data_dir) # START RESTORED NODE - restored_node.append_conf( - "postgresql.auto.conf", "port = {0}".format(restored_node.port)) + self.set_auto_conf(restored_node, {'port': restored_node.port}) restored_node.slow_start() result_new = restored_node.safe_psql( @@ -362,11 +443,9 @@ def test_page_delete(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', 'autovacuum': 'off' } @@ -375,7 +454,7 @@ def test_page_delete(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') # FULL backup @@ -384,18 +463,15 @@ def test_page_delete(self): "postgres", "create table t_heap tablespace somedata as select i as id," " md5(i::text) as text, md5(i::text)::tsvector as tsvector" - " from generate_series(0,100) i" - ) + " from generate_series(0,100) i") node.safe_psql( "postgres", - "delete from t_heap" - ) + "delete from t_heap") node.safe_psql( "postgres", - "vacuum t_heap" - ) + "vacuum t_heap") # PAGE BACKUP self.backup_node( @@ -405,8 +481,7 @@ def test_page_delete(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) node_restored.cleanup() self.restore_node( @@ -425,9 +500,8 @@ def test_page_delete(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -442,11 +516,10 @@ def test_page_delete_1(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, initdb_params=['--data-checksums'], + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', 'autovacuum': 'off' } @@ -455,7 +528,7 @@ def test_page_delete_1(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() self.create_tblspace_in_node(node, 'somedata') @@ -486,7 +559,7 @@ def test_page_delete_1(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) + base_dir=os.path.join(module_name, fname, 'node_restored') ) node_restored.cleanup() @@ -506,9 +579,8 @@ def test_page_delete_1(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -522,21 +594,21 @@ def test_parallel_pagemap(self): # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), initdb_params=['--data-checksums'], pg_options={ "hot_standby": "on" } ) node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node_restored'), ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) node_restored.cleanup() self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # Do full backup self.backup_node(backup_dir, 'node', node) @@ -574,9 +646,8 @@ def test_parallel_pagemap(self): pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() # Check restored node count2 = node_restored.execute("postgres", "select count(*) from test") @@ -597,7 +668,7 @@ def test_parallel_pagemap_1(self): # Initialize instance and backup directory node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), initdb_params=['--data-checksums'], pg_options={} ) @@ -605,7 +676,7 @@ def test_parallel_pagemap_1(self): self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # Do full backup self.backup_node(backup_dir, 'node', node) @@ -634,8 +705,807 @@ def test_parallel_pagemap_1(self): # Drop node and restore it node.cleanup() self.restore_node(backup_dir, 'node', node) - node.start() + node.slow_start() + + # Clean after yourself + node.cleanup() + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_backup_with_lost_wal_segment(self): + """ + make node with archiving + make archive backup, then generate some wals with pgbench, + delete latest archived wal segment + run page backup, expecting error because of missing wal segment + make sure that backup status is 'ERROR' + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + # make some wals + node.pgbench_init(scale=3) + + # delete last wal segment + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join( + wals_dir, f)) and not f.endswith('.backup') and not f.endswith('.part')] + wals = map(str, wals) + file = os.path.join(wals_dir, max(wals)) + os.remove(file) + if self.archive_compress: + file = file[:-3] + + # Single-thread PAGE backup + try: + self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.assertEqual( + 1, 0, + "Expecting Error because of wal segment disappearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Wait for WAL segment' in e.message and + 'to be archived' in e.message and + 'Could not read WAL record at' in e.message and + 'is absent' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[1]['status'], + 'Backup {0} should have STATUS "ERROR"') + + # Multi-thread PAGE backup + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segment disappearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Wait for WAL segment' in e.message and + 'to be archived' in e.message and + 'Could not read WAL record at' in e.message and + 'is absent' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[2]['status'], + 'Backup {0} should have STATUS "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_backup_with_corrupted_wal_segment(self): + """ + make node with archiving + make archive backup, then generate some wals with pgbench, + corrupt latest archived wal segment + run page backup, expecting error because of missing wal segment + make sure that backup status is 'ERROR' + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + # make some wals + node.pgbench_init(scale=10) + + # delete last wal segment + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join( + wals_dir, f)) and not f.endswith('.backup')] + wals = map(str, wals) + # file = os.path.join(wals_dir, max(wals)) + + if self.archive_compress: + original_file = os.path.join(wals_dir, '000000010000000000000004.gz') + tmp_file = os.path.join(backup_dir, '000000010000000000000004') + + with gzip.open(original_file, 'rb') as f_in, open(tmp_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + # drop healthy file + os.remove(original_file) + file = tmp_file + + else: + file = os.path.join(wals_dir, '000000010000000000000004') + + # corrupt file + print(file) + with open(file, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + if self.archive_compress: + # compress corrupted file and replace with it old file + with open(file, 'rb') as f_in, gzip.open(original_file, 'wb', compresslevel=1) as f_out: + shutil.copyfileobj(f_in, f_out) + + file = os.path.join(wals_dir, '000000010000000000000004.gz') + + #if self.archive_compress: + # file = file[:-3] + + # Single-thread PAGE backup + try: + self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.assertEqual( + 1, 0, + "Expecting Error because of wal segment disappearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Wait for WAL segment' in e.message and + 'to be archived' in e.message and + 'Could not read WAL record at' in e.message and + 'Possible WAL corruption. Error has occured during reading WAL segment' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[1]['status'], + 'Backup {0} should have STATUS "ERROR"') + + # Multi-thread PAGE backup + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segment disappearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Wait for WAL segment' in e.message and + 'to be archived' in e.message and + 'Could not read WAL record at' in e.message and + 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( + file) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[2]['status'], + 'Backup {0} should have STATUS "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_backup_with_alien_wal_segment(self): + """ + make two nodes with archiving + take archive full backup from both nodes, + generate some wals with pgbench on both nodes, + move latest archived wal segment from second node to first node`s archive + run page backup on first node + expecting error because of alien wal segment + make sure that backup status is 'ERROR' + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + alien_node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'alien_node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.add_instance(backup_dir, 'alien_node', alien_node) + self.set_archiving(backup_dir, 'alien_node', alien_node) + alien_node.slow_start() + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + self.backup_node( + backup_dir, 'alien_node', alien_node, options=['--stream']) + + # make some wals + node.safe_psql( + "postgres", + "create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i;") + + alien_node.safe_psql( + "postgres", + "create database alien") + + alien_node.safe_psql( + "alien", + "create sequence t_seq; " + "create table t_heap_alien as select i as id, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,100000) i;") + + # copy latest wal segment + wals_dir = os.path.join(backup_dir, 'wal', 'alien_node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join( + wals_dir, f)) and not f.endswith('.backup')] + wals = map(str, wals) + filename = max(wals) + file = os.path.join(wals_dir, filename) + file_destination = os.path.join( + os.path.join(backup_dir, 'wal', 'node'), filename) +# file = os.path.join(wals_dir, '000000010000000000000004') + print(file) + print(file_destination) + os.remove(file_destination) + os.rename(file, file_destination) + + # Single-thread PAGE backup + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page') + self.assertEqual( + 1, 0, + "Expecting Error because of alien wal segment.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Wait for WAL segment' in e.message and + 'to be archived' in e.message and + 'Could not read WAL record at' in e.message and + 'Possible WAL corruption. Error has occured during reading WAL segment' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[1]['status'], + 'Backup {0} should have STATUS "ERROR"') + + # Multi-thread PAGE backup + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of alien wal segment.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn('INFO: Wait for WAL segment', e.message) + self.assertIn('to be archived', e.message) + self.assertIn('Could not read WAL record at', e.message) + self.assertIn('WAL file is from different database system: ' + 'WAL file database system identifier is', e.message) + self.assertIn('pg_control database system identifier is', e.message) + self.assertIn('Possible WAL corruption. Error has occured ' + 'during reading WAL segment', e.message) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[2]['status'], + 'Backup {0} should have STATUS "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_multithread_page_backup_with_toast(self): + """ + make node, create toast, do multithread PAGE backup + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + # make some wals + node.safe_psql( + "postgres", + "create table t3 as select i, " + "repeat(md5(i::text),5006056) as fat_attr " + "from generate_series(0,70) i") + + # Multi-thread PAGE backup + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=["-j", "4"]) # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_page_create_db(self): + """ + Make node, take full backup, create database db1, take page backup, + restore database and check it presense + """ + self.maxDiff = None + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_size': '10GB', + 'checkpoint_timeout': '5min', + 'autovacuum': 'off' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL BACKUP + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(i::text)::tsvector as tsvector from generate_series(0,100) i") + + self.backup_node( + backup_dir, 'node', node) + + # CREATE DATABASE DB1 + node.safe_psql("postgres", "create database db1") + node.safe_psql( + "db1", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(i::text)::tsvector as tsvector from generate_series(0,1000) i") + + # PAGE BACKUP + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + # RESTORE + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, + backup_id=backup_id, options=["-j", "4"]) + + # COMPARE PHYSICAL CONTENT + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # START RESTORED NODE + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + node_restored.safe_psql('db1', 'select 1') + node_restored.cleanup() + + # DROP DATABASE DB1 + node.safe_psql( + "postgres", "drop database db1") + # SECOND PAGE BACKUP + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + # RESTORE SECOND PAGE BACKUP + self.restore_node( + backup_dir, 'node', node_restored, + backup_id=backup_id, options=["-j", "4"] + ) + + # COMPARE PHYSICAL CONTENT + if self.paranoia: + pgdata_restored = self.pgdata_content( + node_restored.data_dir, ignore_ptrack=False) + self.compare_pgdata(pgdata, pgdata_restored) + + # START RESTORED NODE + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + try: + node_restored.safe_psql('db1', 'select 1') + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because we are connecting to deleted database" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd) + ) + except QueryException as e: + self.assertTrue( + 'FATAL: database "db1" does not exist' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd) + ) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_multi_timeline_page(self): + """ + Check that backup in PAGE mode choose + parent backup correctly: + t12 /---P--> + ... + t3 /----> + t2 /----> + t1 -F-----D-> + + P must have F as parent + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql("postgres", "create extension pageinspect") + + try: + node.safe_psql( + "postgres", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "postgres", + "create extension amcheck_next") + + node.pgbench_init(scale=20) + full_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + self.backup_node(backup_dir, 'node', node, backup_type='delta') + node.cleanup() + self.restore_node( + backup_dir, 'node', node, backup_id=full_id, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + node.slow_start() + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # create timelines + for i in range(2, 7): + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target=latest', + '--recovery-target-action=promote', + '--recovery-target-timeline={0}'.format(i)]) + node.slow_start() + + # at this point there is i+1 timeline + pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # create backup at 2, 4 and 6 timeline + if i % 2 == 0: + self.backup_node(backup_dir, 'node', node, backup_type='page') + + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', + options=['--log-level-file=VERBOSE']) + + pgdata = self.pgdata_content(node.data_dir) + + result = node.safe_psql( + "postgres", "select * from pgbench_accounts") + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + result_new = node_restored.safe_psql( + "postgres", "select * from pgbench_accounts") + + self.assertEqual(result, result_new) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.checkdb_node( + backup_dir, + 'node', + options=[ + '--amcheck', + '-d', 'postgres', '-p', str(node.port)]) + + self.checkdb_node( + backup_dir, + 'node', + options=[ + '--amcheck', + '-d', 'postgres', '-p', str(node_restored.port)]) + + backup_list = self.show_pb(backup_dir, 'node') + + self.assertEqual( + backup_list[2]['parent-backup-id'], + backup_list[0]['id']) + self.assertEqual(backup_list[2]['current-tli'], 3) + + self.assertEqual( + backup_list[3]['parent-backup-id'], + backup_list[2]['id']) + self.assertEqual(backup_list[3]['current-tli'], 5) + + self.assertEqual( + backup_list[4]['parent-backup-id'], + backup_list[3]['id']) + self.assertEqual(backup_list[4]['current-tli'], 7) + + self.assertEqual( + backup_list[5]['parent-backup-id'], + backup_list[4]['id']) + self.assertEqual(backup_list[5]['current-tli'], 7) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_multitimeline_page_1(self): + """ + Check that backup in PAGE mode choose + parent backup correctly: + t2 /----> + t1 -F--P---D-> + + P must have F as parent + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off', 'wal_log_hints': 'on'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql("postgres", "create extension pageinspect") + + try: + node.safe_psql( + "postgres", + "create extension amcheck") + except QueryException as e: + node.safe_psql( + "postgres", + "create extension amcheck_next") + + node.pgbench_init(scale=20) + full_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '20', '-c', '1']) + pgbench.wait() + + page1 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '1', '--no-vacuum']) + pgbench.wait() + + page1 = self.backup_node(backup_dir, 'node', node, backup_type='delta') + + node.cleanup() + self.restore_node( + backup_dir, 'node', node, backup_id=page1, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + node.slow_start() + + pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum']) + pgbench.wait() + + print(self.backup_node( + backup_dir, 'node', node, backup_type='page', + options=['--log-level-console=LOG'], return_id=False)) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + node_restored.slow_start() + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + # @unittest.expectedFailure + def test_page_pg_resetxlog(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'shared_buffers': '512MB', + 'max_wal_size': '3GB'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select nextval('t_seq')::int as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " +# "from generate_series(0,25600) i") + "from generate_series(0,2560) i") + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + + self.switch_wal_segment(node) + + # kill the bastard + if self.verbose: + print('Killing postmaster. Losing Ptrack changes') + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + # now smack it with sledgehammer + if node.major_version >= 10: + pg_resetxlog_path = self.get_bin_path('pg_resetwal') + wal_dir = 'pg_wal' + else: + pg_resetxlog_path = self.get_bin_path('pg_resetxlog') + wal_dir = 'pg_xlog' + + self.run_binary( + [ + pg_resetxlog_path, + '-D', + node.data_dir, + '-o 42', + '-f' + ], + asynchronous=False) + + if not node.status(): + node.slow_start() + else: + print("Die! Die! Why won't you die?... Why won't you die?") + exit(1) + + # take ptrack backup +# self.backup_node( +# backup_dir, 'node', node, +# backup_type='page', options=['--stream']) + + try: + self.backup_node( + backup_dir, 'node', node, backup_type='page') + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because instance was brutalized by pg_resetxlog" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd) + ) + except ProbackupException as e: + self.assertIn( + 'Insert error message', + e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd)) + +# pgdata = self.pgdata_content(node.data_dir) +# +# node_restored = self.make_simple_node( +# base_dir=os.path.join(module_name, fname, 'node_restored')) +# node_restored.cleanup() +# +# self.restore_node( +# backup_dir, 'node', node_restored) +# +# pgdata_restored = self.pgdata_content(node_restored.data_dir) +# self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself self.del_test_dir(module_name, fname) diff --git a/tests/pgpro2068.py b/tests/pgpro2068.py new file mode 100644 index 000000000..253be3441 --- /dev/null +++ b/tests/pgpro2068.py @@ -0,0 +1,193 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack +from datetime import datetime, timedelta +import subprocess +from time import sleep +import shutil +import signal +from testgres import ProcessType + + +module_name = '2068' + + +class BugTest(ProbackupTest, unittest.TestCase): + + def test_minrecpoint_on_replica(self): + """ + https://jira.postgrespro.ru/browse/PGPRO-2068 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + # 'checkpoint_timeout': '60min', + 'checkpoint_completion_target': '0.9', + 'bgwriter_delay': '10ms', + 'bgwriter_lru_maxpages': '1000', + 'bgwriter_lru_multiplier': '4.0', + 'max_wal_size': '256MB'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take full backup and restore it as replica + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # start replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'node', replica, options=['-R']) + self.set_replica(node, replica) + self.add_instance(backup_dir, 'replica', replica) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + self.set_auto_conf( + replica, + {'port': replica.port, 'restart_after_crash': 'off'}) + + # we need those later + node.safe_psql( + "postgres", + "CREATE EXTENSION plpythonu") + + node.safe_psql( + "postgres", + "CREATE EXTENSION pageinspect") + + replica.slow_start(replica=True) + + # generate some data + node.pgbench_init(scale=10) + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "20"]) + pgbench.wait() + pgbench.stdout.close() + + # generate some more data and leave it in background + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-j 4", "-T", "100"]) + + # wait for shared buffer on replica to be filled with dirty data + sleep(20) + + # get pids of replica background workers + startup_pid = replica.auxiliary_pids[ProcessType.Startup][0] + checkpointer_pid = replica.auxiliary_pids[ProcessType.Checkpointer][0] + bgwriter_pid = replica.auxiliary_pids[ProcessType.BackgroundWriter][0] + + # break checkpointer on UpdateLastRemovedPtr + gdb_checkpointer = self.gdb_attach(checkpointer_pid) + gdb_checkpointer._execute('handle SIGINT noprint nostop pass') + gdb_checkpointer._execute('handle SIGUSR1 noprint nostop pass') + gdb_checkpointer.set_breakpoint('UpdateLastRemovedPtr') + gdb_checkpointer.continue_execution_until_break() + + # break recovery on UpdateControlFile + gdb_recovery = self.gdb_attach(startup_pid) + gdb_recovery._execute('handle SIGINT noprint nostop pass') + gdb_recovery._execute('handle SIGUSR1 noprint nostop pass') + gdb_recovery.set_breakpoint('UpdateMinRecoveryPoint') + gdb_recovery.continue_execution_until_break() + gdb_recovery.set_breakpoint('UpdateControlFile') + gdb_recovery.continue_execution_until_break() + + # stop data generation + pgbench.wait() + pgbench.stdout.close() + + # kill someone, we need a crash + os.kill(int(bgwriter_pid), 9) + gdb_recovery._execute('detach') + gdb_checkpointer._execute('detach') + + # just to be sure + try: + replica.stop(['-m', 'immediate', '-D', replica.data_dir]) + except: + pass + + # MinRecLSN = replica.get_control_data()['Minimum recovery ending location'] + + # Promote replica with 'immediate' target action + if self.get_version(replica) >= self.version_to_num('12.0'): + recovery_config = 'postgresql.auto.conf' + else: + recovery_config = 'recovery.conf' + + replica.append_conf( + recovery_config, "recovery_target = 'immediate'") + replica.append_conf( + recovery_config, "recovery_target_action = 'pause'") + replica.slow_start(replica=True) + + if self.get_version(node) < 100000: + script = ''' +DO +$$ +relations = plpy.execute("select class.oid from pg_class class WHERE class.relkind IN ('r', 'i', 't', 'm') and class.relpersistence = 'p'") +current_xlog_lsn = plpy.execute("SELECT min_recovery_end_lsn as lsn FROM pg_control_recovery()")[0]['lsn'] +plpy.notice('CURRENT LSN: {0}'.format(current_xlog_lsn)) +found_corruption = False +for relation in relations: + pages_from_future = plpy.execute("with number_of_blocks as (select blknum from generate_series(0, pg_relation_size({0}) / 8192 -1) as blknum) select blknum, lsn, checksum, flags, lower, upper, special, pagesize, version, prune_xid from number_of_blocks, page_header(get_raw_page('{0}'::oid::regclass::text, number_of_blocks.blknum::int)) where lsn > '{1}'::pg_lsn".format(relation['oid'], current_xlog_lsn)) + + if pages_from_future.nrows() == 0: + continue + + for page in pages_from_future: + plpy.notice('Found page from future. OID: {0}, BLKNUM: {1}, LSN: {2}'.format(relation['oid'], page['blknum'], page['lsn'])) + found_corruption = True +if found_corruption: + plpy.error('Found Corruption') +$$ LANGUAGE plpythonu; +''' + else: + script = ''' +DO +$$ +relations = plpy.execute("select class.oid from pg_class class WHERE class.relkind IN ('r', 'i', 't', 'm') and class.relpersistence = 'p'") +current_xlog_lsn = plpy.execute("select pg_last_wal_replay_lsn() as lsn")[0]['lsn'] +plpy.notice('CURRENT LSN: {0}'.format(current_xlog_lsn)) +found_corruption = False +for relation in relations: + pages_from_future = plpy.execute("with number_of_blocks as (select blknum from generate_series(0, pg_relation_size({0}) / 8192 -1) as blknum) select blknum, lsn, checksum, flags, lower, upper, special, pagesize, version, prune_xid from number_of_blocks, page_header(get_raw_page('{0}'::oid::regclass::text, number_of_blocks.blknum::int)) where lsn > '{1}'::pg_lsn".format(relation['oid'], current_xlog_lsn)) + + if pages_from_future.nrows() == 0: + continue + + for page in pages_from_future: + plpy.notice('Found page from future. OID: {0}, BLKNUM: {1}, LSN: {2}'.format(relation['oid'], page['blknum'], page['lsn'])) + found_corruption = True +if found_corruption: + plpy.error('Found Corruption') +$$ LANGUAGE plpythonu; +''' + + # Find blocks from future + replica.safe_psql( + 'postgres', + script) + + # error is expected if version < 10.6 + # gdb_backup.continue_execution_until_exit() + + # do basebackup + + # do pg_probackup, expect error + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/pgpro560.py b/tests/pgpro560.py index bf3345561..53c7914a2 100644 --- a/tests/pgpro560.py +++ b/tests/pgpro560.py @@ -1,8 +1,9 @@ import os import unittest -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException from datetime import datetime, timedelta import subprocess +from time import sleep module_name = 'pgpro560' @@ -20,29 +21,32 @@ def test_pgpro560_control_file_loss(self): check that backup failed """ fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() - file = os.path.join(node.base_dir,'data', 'global', 'pg_control') + file = os.path.join(node.base_dir, 'data', 'global', 'pg_control') os.remove(file) try: self.backup_node(backup_dir, 'node', node, options=['--stream']) # we should die here because exception is what we expect to happen - self.assertEqual(1, 0, "Expecting Error because pg_control was deleted.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) + self.assertEqual( + 1, 0, + "Expecting Error because pg_control was deleted.\n " + "Output: {0} \n CMD: {1}".format(repr(self.output), self.cmd)) except ProbackupException as e: self.assertTrue( - 'ERROR: could not open file' in e.message - and 'pg_control' in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + 'ERROR: Could not open file' in e.message and + 'pg_control' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) # Clean after yourself self.del_test_dir(module_name, fname) @@ -55,18 +59,18 @@ def test_pgpro560_systemid_mismatch(self): check that backup failed """ fname = self.id().split('.')[3] - node1 = self.make_simple_node(base_dir="{0}/{1}/node1".format(module_name, fname), + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - node1.start() - node2 = self.make_simple_node(base_dir="{0}/{1}/node2".format(module_name, fname), + initdb_params=['--data-checksums']) + + node1.slow_start() + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - node2.start() + initdb_params=['--data-checksums']) + + node2.slow_start() backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) @@ -75,24 +79,52 @@ def test_pgpro560_systemid_mismatch(self): try: self.backup_node(backup_dir, 'node1', node2, options=['--stream']) # we should die here because exception is what we expect to happen - self.assertEqual(1, 0, "Expecting Error because of SYSTEM ID mismatch.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) + self.assertEqual( + 1, 0, + "Expecting Error because of SYSTEM ID mismatch.\n " + "Output: {0} \n CMD: {1}".format(repr(self.output), self.cmd)) except ProbackupException as e: - self.assertTrue( - 'ERROR: Backup data directory was initialized for system id' in e.message - and 'but connected instance system id is' in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + if self.get_version(node1) > 90600: + self.assertTrue( + 'ERROR: Backup data directory was ' + 'initialized for system id' in e.message and + 'but connected instance system id is' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + else: + self.assertIn( + 'ERROR: System identifier mismatch. ' + 'Connected PostgreSQL instance has system id', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + sleep(1) try: - self.backup_node(backup_dir, 'node1', node2, data_dir=node1.data_dir, options=['--stream']) + self.backup_node( + backup_dir, 'node1', node2, + data_dir=node1.data_dir, options=['--stream']) # we should die here because exception is what we expect to happen - self.assertEqual(1, 0, "Expecting Error because of of SYSTEM ID mismatch.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) + self.assertEqual( + 1, 0, + "Expecting Error because of of SYSTEM ID mismatch.\n " + "Output: {0} \n CMD: {1}".format(repr(self.output), self.cmd)) except ProbackupException as e: - self.assertTrue( - 'ERROR: Backup data directory was initialized for system id' in e.message - and 'but connected instance system id is' in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + if self.get_version(node1) > 90600: + self.assertTrue( + 'ERROR: Backup data directory was initialized ' + 'for system id' in e.message and + 'but connected instance system id is' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + else: + self.assertIn( + 'ERROR: System identifier mismatch. ' + 'Connected PostgreSQL instance has system id', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) # Clean after yourself self.del_test_dir(module_name, fname) diff --git a/tests/pgpro589.py b/tests/pgpro589.py index bd40f16de..d6381a8b5 100644 --- a/tests/pgpro589.py +++ b/tests/pgpro589.py @@ -19,18 +19,17 @@ def test_pgpro589(self): """ fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - # make erroneus archive_command - node.append_conf("postgresql.auto.conf", "archive_command = 'exit 0'") - node.start() + # make erroneous archive_command + self.set_auto_conf(node, {'archive_command': 'exit 0'}) + node.slow_start() node.pgbench_init(scale=5) pgbench = node.pgbench( @@ -58,8 +57,8 @@ def test_pgpro589(self): except ProbackupException as e: self.assertTrue( 'INFO: Wait for WAL segment' in e.message and - 'ERROR: Switched WAL segment' in e.message and - 'could not be archived' in e.message, + 'ERROR: WAL segment' in e.message and + 'could not be archived in 10 seconds' in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) diff --git a/tests/ptrack.py b/tests/ptrack.py index c2d6abff3..a709afb74 100644 --- a/tests/ptrack.py +++ b/tests/ptrack.py @@ -1,12 +1,13 @@ import os import unittest -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack from datetime import datetime, timedelta import subprocess -from testgres import QueryException +from testgres import QueryException, StartNodeException import shutil import sys -import time +from time import sleep +from threading import Thread module_name = 'ptrack' @@ -14,6 +15,250 @@ class PtrackTest(ProbackupTest, unittest.TestCase): + # @unittest.skip("skip") + def test_ptrack_simple(self): + """make node, make full and ptrack stream backups," + " restore them and check data correctness""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.safe_psql( + "postgres", + "create table t_heap as select i" + " as id from generate_series(0,1) i") + + self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + options=['--stream']) + + node.safe_psql( + "postgres", + "update t_heap set id = 100500") + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + result = node.safe_psql("postgres", "SELECT * FROM t_heap") + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, options=["-j", "4"]) + + # Physical comparison + if self.paranoia: + pgdata_restored = self.pgdata_content( + node_restored.data_dir, ignore_ptrack=False) + self.compare_pgdata(pgdata, pgdata_restored) + + self.set_auto_conf( + node_restored, {'port': node_restored.port}) + + node_restored.slow_start() + + # Logical comparison + self.assertEqual( + result, + node_restored.safe_psql("postgres", "SELECT * FROM t_heap")) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_unprivileged(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + # self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "CREATE DATABASE backupdb") + + # PG 9.5 + if self.get_version(node) < 90600: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.timestamptz(timestamp with time zone, integer) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + # PG 9.6 + elif self.get_version(node) > 90600 and self.get_version(node) < 100000: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.timestamptz(timestamp with time zone, integer) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_xlog_replay_location() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + # >= 10 + else: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + + if node.major_version < 12: + fnames = [ + 'pg_catalog.oideq(oid, oid)', + 'pg_catalog.ptrack_version()', + 'pg_catalog.pg_ptrack_clear()', + 'pg_catalog.pg_ptrack_control_lsn()', + 'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)', + 'pg_catalog.pg_ptrack_get_and_clear(oid, oid)', + 'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)' + ] + + for fname in fnames: + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION {0} TO backup".format(fname)) + + else: + node.safe_psql( + "backupdb", + "CREATE SCHEMA ptrack") + node.safe_psql( + "backupdb", + "CREATE EXTENSION ptrack WITH SCHEMA ptrack") + node.safe_psql( + "backupdb", + "GRANT USAGE ON SCHEMA ptrack TO backup") + + node.safe_psql( + "backupdb", + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup") + + if ProbackupTest.enterprise: + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_edition() TO backup") + + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_version() TO backup") + + self.backup_node( + backup_dir, 'node', node, + datname='backupdb', options=['--stream', "-U", "backup"]) + + self.backup_node( + backup_dir, 'node', node, datname='backupdb', + backup_type='ptrack', options=['--stream', "-U", "backup"]) + + # @unittest.skip("skip") # @unittest.expectedFailure def test_ptrack_enable(self): @@ -21,18 +266,20 @@ def test_ptrack_enable(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s' - } - ) + 'checkpoint_timeout': '30s', + 'shared_preload_libraries': 'ptrack'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # PTRACK BACKUP try: @@ -70,30 +317,40 @@ def test_ptrack_disable(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on' - } - ) + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={'checkpoint_timeout': '30s'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # FULL BACKUP self.backup_node(backup_dir, 'node', node, options=['--stream']) # DISABLE PTRACK - node.safe_psql('postgres', "alter system set ptrack_enable to off") - node.restart() + if node.major_version >= 12: + node.safe_psql('postgres', "alter system set ptrack.map_size to 0") + else: + node.safe_psql('postgres', "alter system set ptrack_enable to off") + node.stop() + node.slow_start() # ENABLE PTRACK - node.safe_psql('postgres', "alter system set ptrack_enable to on") - node.restart() + if node.major_version >= 12: + node.safe_psql('postgres', "alter system set ptrack.map_size to '128'") + node.safe_psql('postgres', "alter system set shared_preload_libraries to 'ptrack'") + else: + node.safe_psql('postgres', "alter system set ptrack_enable to on") + node.stop() + node.slow_start() # PTRACK BACKUP try: @@ -123,62 +380,62 @@ def test_ptrack_disable(self): self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_ptrack_uncommited_xact(self): - """make ptrack backup while there is uncommited open transaction""" + def test_ptrack_uncommitted_xact(self): + """make ptrack backup while there is uncommitted open transaction""" fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on' - } - ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'node', node, options=['--stream']) - self.backup_node(backup_dir, 'node', node) con = node.connect("postgres") con.execute( "create table t_heap as select i" - " as id from generate_series(0,1) i" - ) + " as id from generate_series(0,1) i") self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] - ) - pgdata = self.pgdata_content(node.data_dir) + options=['--stream']) - self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] - ) + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() self.restore_node( - backup_dir, 'node', node_restored, options=["-j", "4"]) + backup_dir, 'node', node_restored, + node_restored.data_dir, options=["-j", "4"]) - # Physical comparison if self.paranoia: pgdata_restored = self.pgdata_content( - node_restored.data_dir, ignore_ptrack=False) - self.compare_pgdata(pgdata, pgdata_restored) + node_restored.data_dir, ignore_ptrack=False) + + self.set_auto_conf( + node_restored, {'port': node_restored.port}) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) + node_restored.slow_start() - node_restored.start() + # Physical comparison + if self.paranoia: + self.compare_pgdata(pgdata, pgdata_restored) # Clean after yourself self.del_test_dir(module_name, fname) @@ -190,28 +447,23 @@ def test_ptrack_vacuum_full(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on' - } - ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + ptrack_enable=True, + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() + node.slow_start() + self.create_tblspace_in_node(node, 'somedata') - self.backup_node(backup_dir, 'node', node) + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'node', node, options=['--stream']) node.safe_psql( "postgres", @@ -219,44 +471,39 @@ def test_ptrack_vacuum_full(self): " as id from generate_series(0,1000000) i" ) - # create async connection - conn = self.get_async_connect(port=node.port) + pg_connect = node.connect("postgres", autocommit=True) - self.wait(conn) - - acurs = conn.cursor() - acurs.execute("select pg_backend_pid()") - - self.wait(conn) - pid = acurs.fetchall()[0][0] - print(pid) - - gdb = self.gdb_attach(pid) + gdb = self.gdb_attach(pg_connect.pid) gdb.set_breakpoint('reform_and_rewrite_tuple') - if not gdb.continue_execution_until_running(): - print('Failed gdb continue') - exit(1) + gdb.continue_execution_until_running() - acurs.execute("VACUUM FULL t_heap") + process = Thread( + target=pg_connect.execute, args=["VACUUM FULL t_heap"]) + process.start() - if gdb.stopped_in_breakpoint(): - if gdb.continue_execution_until_break(20) != 'breakpoint-hit': - print('Failed to hit breakpoint') - exit(1) + while not gdb.stopped_in_breakpoint: + sleep(1) + + gdb.continue_execution_until_break(20) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack', options=['--stream']) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack', options=['--stream']) + if self.paranoia: pgdata = self.pgdata_content(node.data_dir) + gdb.remove_all_breakpoints() + gdb._execute('detach') + process.join() + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + old_tablespace = self.get_tblspace_path(node, 'somedata') new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') @@ -272,10 +519,10 @@ def test_ptrack_vacuum_full(self): node_restored.data_dir, ignore_ptrack=False) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) + self.set_auto_conf( + node_restored, {'port': node_restored.port}) - node_restored.start() + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -289,65 +536,58 @@ def test_ptrack_vacuum_truncate(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on', - 'autovacuum': 'off' - } - ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + pg_options={'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() + node.slow_start() + self.create_tblspace_in_node(node, 'somedata') + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + node.safe_psql( "postgres", "create sequence t_seq; " "create table t_heap tablespace somedata as select i as id, " "md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1024) i;" - ) + "from generate_series(0,1024) i;") + node.safe_psql( "postgres", - "vacuum t_heap" - ) + "vacuum t_heap") - self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, options=['--stream']) node.safe_psql( "postgres", - "delete from t_heap where ctid >= '(11,0)'" - ) + "delete from t_heap where ctid >= '(11,0)'") + node.safe_psql( "postgres", - "vacuum t_heap" - ) + "vacuum t_heap") self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack', options=['--stream']) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack', options=['--stream']) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + old_tablespace = self.get_tblspace_path(node, 'somedata') new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new') @@ -365,183 +605,110 @@ def test_ptrack_vacuum_truncate(self): ) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf( + node_restored, {'port': node_restored.port}) + + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_ptrack_simple(self): + def test_ptrack_get_block(self): """make node, make full and ptrack stream backups," " restore them and check data correctness""" fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on' - } - ) - node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname), - ) + ptrack_enable=True, + initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node_restored.cleanup() - node.start() + node.slow_start() - self.backup_node(backup_dir, 'node', node) + if node.major_version >= 12: + self.skipTest("skip --- we do not need ptrack_get_block for ptrack 2.*") + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") node.safe_psql( "postgres", "create table t_heap as select i" - " as id from generate_series(0,1) i" - ) + " as id from generate_series(0,1) i") - self.backup_node( + self.backup_node(backup_dir, 'node', node, options=['--stream']) + gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] - ) + options=['--stream'], + gdb=True) + + if node.major_version > 11: + gdb.set_breakpoint('make_pagemap_from_ptrack_2') + else: + gdb.set_breakpoint('make_pagemap_from_ptrack_1') + gdb.run_until_break() node.safe_psql( "postgres", "update t_heap set id = 100500") + gdb.continue_execution_until_exit() + self.backup_node( backup_dir, 'node', node, - backup_type='ptrack', options=['--stream'] - ) + backup_type='ptrack', options=['--stream']) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) result = node.safe_psql("postgres", "SELECT * FROM t_heap") - - self.restore_node( - backup_dir, 'node', node_restored, options=["-j", "4"]) + node.cleanup() + self.restore_node(backup_dir, 'node', node, options=["-j", "4"]) # Physical comparison if self.paranoia: pgdata_restored = self.pgdata_content( - node_restored.data_dir, ignore_ptrack=False) + node.data_dir, ignore_ptrack=False) self.compare_pgdata(pgdata, pgdata_restored) - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() - + node.slow_start() # Logical comparison self.assertEqual( result, - node_restored.safe_psql("postgres", "SELECT * FROM t_heap") + node.safe_psql("postgres", "SELECT * FROM t_heap") ) # Clean after yourself self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_ptrack_get_block(self): - """make node, make full and ptrack stream backups," - " restore them and check data correctness""" + def test_ptrack_stream(self): + """make node, make full and ptrack stream backups, + restore them and check data correctness""" + self.maxDiff = None fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '300s', - 'ptrack_enable': 'on' - } - ) + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select i" - " as id from generate_series(0,1) i" - ) - - self.backup_node(backup_dir, 'node', node, options=['--stream']) - gdb = self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], - gdb=True - ) - - gdb.set_breakpoint('make_pagemap_from_ptrack') - gdb.run_until_break() - - node.safe_psql( - "postgres", - "update t_heap set id = 100500") - - gdb.continue_execution_until_exit() - - self.backup_node( - backup_dir, 'node', node, - backup_type='ptrack', options=['--stream'] - ) - if self.paranoia: - pgdata = self.pgdata_content(node.data_dir) - - result = node.safe_psql("postgres", "SELECT * FROM t_heap") - node.cleanup() - self.restore_node(backup_dir, 'node', node, options=["-j", "4"]) - - # Physical comparison - if self.paranoia: - pgdata_restored = self.pgdata_content( - node.data_dir, ignore_ptrack=False) - self.compare_pgdata(pgdata, pgdata_restored) - - node.start() - # Logical comparison - self.assertEqual( - result, - node.safe_psql("postgres", "SELECT * FROM t_heap") - ) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_stream(self): - """make node, make full and ptrack stream backups, - restore them and check data correctness""" - self.maxDiff = None - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off' - } - ) + node.slow_start() - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # FULL BACKUP node.safe_psql("postgres", "create sequence t_seq") @@ -549,8 +716,8 @@ def test_ptrack_stream(self): "postgres", "create table t_heap as select i as id, nextval('t_seq')" " as t_seq, md5(i::text) as text, md5(i::text)::tsvector" - " as tsvector from generate_series(0,100) i" - ) + " as tsvector from generate_series(0,100) i") + full_result = node.safe_psql("postgres", "SELECT * FROM t_heap") full_backup_id = self.backup_node( backup_dir, 'node', node, options=['--stream']) @@ -560,14 +727,12 @@ def test_ptrack_stream(self): "postgres", "insert into t_heap select i as id, nextval('t_seq') as t_seq," " md5(i::text) as text, md5(i::text)::tsvector as tsvector" - " from generate_series(100,200) i" - ) + " from generate_series(100,200) i") + ptrack_result = node.safe_psql("postgres", "SELECT * FROM t_heap") ptrack_backup_id = self.backup_node( - backup_dir, 'node', - node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] - ) + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -580,8 +745,7 @@ def test_ptrack_stream(self): "INFO: Restore of backup {0} completed.".format(full_backup_id), self.restore_node( backup_dir, 'node', node, - backup_id=full_backup_id, - options=["-j", "4", "--recovery-target-action=promote"] + backup_id=full_backup_id, options=["-j", "4"] ), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd) @@ -596,12 +760,10 @@ def test_ptrack_stream(self): "INFO: Restore of backup {0} completed.".format(ptrack_backup_id), self.restore_node( backup_dir, 'node', node, - backup_id=ptrack_backup_id, - options=["-j", "4", "--recovery-target-action=promote"] + backup_id=ptrack_backup_id, options=["-j", "4"] ), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd) - ) + repr(self.output), self.cmd)) if self.paranoia: pgdata_restored = self.pgdata_content( @@ -623,21 +785,23 @@ def test_ptrack_archive(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, initdb_params=['--data-checksums'], + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off' - } - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # FULL BACKUP node.safe_psql( @@ -646,8 +810,8 @@ def test_ptrack_archive(self): " select i as id," " md5(i::text) as text," " md5(i::text)::tsvector as tsvector" - " from generate_series(0,100) i" - ) + " from generate_series(0,100) i") + full_result = node.safe_psql("postgres", "SELECT * FROM t_heap") full_backup_id = self.backup_node(backup_dir, 'node', node) full_target_time = self.show_pb( @@ -659,8 +823,8 @@ def test_ptrack_archive(self): "insert into t_heap select i as id," " md5(i::text) as text," " md5(i::text)::tsvector as tsvector" - " from generate_series(100,200) i" - ) + " from generate_series(100,200) i") + ptrack_result = node.safe_psql("postgres", "SELECT * FROM t_heap") ptrack_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack') @@ -724,26 +888,24 @@ def test_ptrack_archive(self): def test_ptrack_pgpro417(self): """Make node, take full backup, take ptrack backup, delete ptrack backup. Try to take ptrack backup, - which should fail""" - self.maxDiff = None + which should fail. Actual only for PTRACK 1.x""" + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': - 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off'} - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -807,27 +969,27 @@ def test_ptrack_pgpro417(self): def test_page_pgpro417(self): """ Make archive node, take full backup, take page backup, - delete page backup. Try to take ptrack backup, which should fail + delete page backup. Try to take ptrack backup, which should fail. + Actual only for PTRACK 1.x """ - self.maxDiff = None + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off'} - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -835,7 +997,6 @@ def test_page_pgpro417(self): "create table t_heap as select i as id, md5(i::text) as text, " "md5(i::text)::tsvector as tsvector from generate_series(0,100) i") node.safe_psql("postgres", "SELECT * FROM t_heap") - self.backup_node(backup_dir, 'node', node) # PAGE BACKUP node.safe_psql( @@ -879,26 +1040,26 @@ def test_page_pgpro417(self): def test_full_pgpro417(self): """ Make node, take two full backups, delete full second backup. - Try to take ptrack backup, which should fail + Try to take ptrack backup, which should fail. + Relevant only for PTRACK 1.x """ - self.maxDiff = None + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', 'autovacuum': 'off' - } - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() # FULL BACKUP node.safe_psql( @@ -958,27 +1119,25 @@ def test_create_db(self): Make node, take full backup, create database db1, take ptrack backup, restore database and check it presense """ - self.maxDiff = None fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_size': '10GB', - 'max_wal_senders': '2', - 'checkpoint_timeout': '5min', - 'ptrack_enable': 'on', - 'autovacuum': 'off' - } - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # FULL BACKUP node.safe_psql( @@ -989,7 +1148,7 @@ def test_create_db(self): node.safe_psql("postgres", "SELECT * FROM t_heap") self.backup_node( backup_dir, 'node', node, - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) # CREATE DATABASE DB1 node.safe_psql("postgres", "create database db1") @@ -1001,17 +1160,14 @@ def test_create_db(self): # PTRACK BACKUP backup_id = self.backup_node( backup_dir, 'node', node, - backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"] - ) + backup_type='ptrack', options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) node_restored.cleanup() self.restore_node( @@ -1025,9 +1181,9 @@ def test_create_db(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf( + node_restored, {'port': node_restored.port}) + node_restored.slow_start() # DROP DATABASE DB1 node.safe_psql( @@ -1045,8 +1201,7 @@ def test_create_db(self): node_restored.cleanup() self.restore_node( backup_dir, 'node', node_restored, - backup_id=backup_id, options=["-j", "4"] - ) + backup_id=backup_id, options=["-j", "4"]) # COMPARE PHYSICAL CONTENT if self.paranoia: @@ -1055,9 +1210,9 @@ def test_create_db(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - "postgresql.auto.conf", "port = {0}".format(node_restored.port)) - node_restored.start() + self.set_auto_conf( + node_restored, {'port': node_restored.port}) + node_restored.slow_start() try: node_restored.safe_psql('db1', 'select 1') @@ -1066,15 +1221,117 @@ def test_create_db(self): 1, 0, "Expecting Error because we are connecting to deleted database" "\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd) - ) + repr(self.output), self.cmd)) except QueryException as e: self.assertTrue( 'FATAL: database "db1" does not exist' in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd) + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_create_db_on_replica(self): + """ + Make node, take full backup, create replica from it, + take full backup from replica, + create database db1, take ptrack backup from replica, + restore database and check it presense + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # FULL BACKUP + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(i::text)::tsvector as tsvector from generate_series(0,100) i") + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + self.restore_node(backup_dir, 'node', replica) + + # Add replica + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(node, replica, 'replica', synchronous=True) + replica.slow_start(replica=True) + + self.backup_node( + backup_dir, 'replica', replica, + options=[ + '-j10', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(node.port), + '--stream' + ] ) + # CREATE DATABASE DB1 + node.safe_psql("postgres", "create database db1") + node.safe_psql( + "db1", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(i::text)::tsvector as tsvector from generate_series(0,100) i") + + # Wait until replica catch up with master + self.wait_until_replica_catch_with_master(node, replica) + replica.safe_psql('postgres', 'checkpoint') + + # PTRACK BACKUP + backup_id = self.backup_node( + backup_dir, 'replica', + replica, backup_type='ptrack', + options=[ + '-j10', + '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(node.port) + ] + ) + + if self.paranoia: + pgdata = self.pgdata_content(replica.data_dir) + + # RESTORE + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'replica', node_restored, + backup_id=backup_id, options=["-j", "4"]) + + # COMPARE PHYSICAL CONTENT + if self.paranoia: + pgdata_restored = self.pgdata_content( + node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + # Clean after yourself self.del_test_dir(module_name, fname) @@ -1085,20 +1342,22 @@ def test_alter_table_set_tablespace_ptrack(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, initdb_params=['--data-checksums'], + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off' - } - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # FULL BACKUP self.create_tblspace_in_node(node, 'somedata') @@ -1106,8 +1365,7 @@ def test_alter_table_set_tablespace_ptrack(self): "postgres", "create table t_heap tablespace somedata as select i as id," " md5(i::text) as text, md5(i::text)::tsvector as tsvector" - " from generate_series(0,100) i" - ) + " from generate_series(0,100) i") # FULL backup self.backup_node(backup_dir, 'node', node, options=["--stream"]) @@ -1115,17 +1373,16 @@ def test_alter_table_set_tablespace_ptrack(self): self.create_tblspace_in_node(node, 'somedata_new') node.safe_psql( "postgres", - "alter table t_heap set tablespace somedata_new" - ) + "alter table t_heap set tablespace somedata_new") # sys.exit(1) # PTRACK BACKUP - result = node.safe_psql( - "postgres", "select * from t_heap") + #result = node.safe_psql( + # "postgres", "select * from t_heap") self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1134,8 +1391,7 @@ def test_alter_table_set_tablespace_ptrack(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname) - ) + base_dir=os.path.join(module_name, fname, 'node_restored')) node_restored.cleanup() self.restore_node( @@ -1149,8 +1405,7 @@ def test_alter_table_set_tablespace_ptrack(self): "-T", "{0}={1}".format( self.get_tblspace_path(node, 'somedata_new'), self.get_tblspace_path(node_restored, 'somedata_new') - ), - "--recovery-target-action=promote" + ) ] ) @@ -1161,14 +1416,14 @@ def test_alter_table_set_tablespace_ptrack(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node_restored.port)) + self.set_auto_conf( + node_restored, {'port': node_restored.port}) node_restored.slow_start() - result_new = node_restored.safe_psql( - "postgres", "select * from t_heap") - - self.assertEqual(result, result_new, 'lost some data after restore') +# result_new = node_restored.safe_psql( +# "postgres", "select * from t_heap") +# +# self.assertEqual(result, result_new, 'lost some data after restore') # Clean after yourself self.del_test_dir(module_name, fname) @@ -1178,24 +1433,25 @@ def test_alter_database_set_tablespace_ptrack(self): """Make node, create tablespace with database," " take full backup, alter tablespace location," " take ptrack backup, restore database.""" - self.maxDiff = None fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off'} - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") # FULL BACKUP self.backup_node(backup_dir, 'node', node, options=["--stream"]) @@ -1211,7 +1467,7 @@ def test_alter_database_set_tablespace_ptrack(self): # PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", '--log-level-file=verbose']) + options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1219,7 +1475,7 @@ def test_alter_database_set_tablespace_ptrack(self): # RESTORE node_restored = self.make_simple_node( - base_dir="{0}/{1}/node_restored".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node_restored')) node_restored.cleanup() self.restore_node( backup_dir, 'node', @@ -1237,7 +1493,8 @@ def test_alter_database_set_tablespace_ptrack(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - node_restored.start() + node_restored.port = node.port + node_restored.slow_start() # Clean after yourself self.del_test_dir(module_name, fname) @@ -1248,24 +1505,25 @@ def test_drop_tablespace(self): Make node, create table, alter table tablespace, take ptrack backup, move table from tablespace, take ptrack backup """ - self.maxDiff = None fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', 'checkpoint_timeout': '30s', - 'ptrack_enable': 'on', - 'autovacuum': 'off'} - ) + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") self.create_tblspace_in_node(node, 'somedata') @@ -1303,11 +1561,20 @@ def test_drop_tablespace(self): backup_dir, 'node', node, backup_type='ptrack', options=["--stream"]) + if self.paranoia: + pgdata = self.pgdata_content( + node.data_dir, ignore_ptrack=True) + tblspace = self.get_tblspace_path(node, 'somedata') node.cleanup() shutil.rmtree(tblspace, ignore_errors=True) self.restore_node(backup_dir, 'node', node, options=["-j", "4"]) - node.start() + + if self.paranoia: + pgdata_restored = self.pgdata_content( + node.data_dir, ignore_ptrack=True) + + node.slow_start() tblspc_exist = node.safe_psql( "postgres", @@ -1323,6 +1590,9 @@ def test_drop_tablespace(self): result_new = node.safe_psql("postgres", "select * from t_heap") self.assertEqual(result, result_new) + if self.paranoia: + self.compare_pgdata(pgdata, pgdata_restored) + # Clean after yourself self.del_test_dir(module_name, fname) @@ -1332,22 +1602,25 @@ def test_ptrack_alter_tablespace(self): Make node, create table, alter table tablespace, take ptrack backup, move table from tablespace, take ptrack backup """ - self.maxDiff = None fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', 'ptrack_enable': 'on', - 'autovacuum': 'off'} - ) + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") self.create_tblspace_in_node(node, 'somedata') tblspc_path = self.get_tblspace_path(node, 'somedata') @@ -1364,14 +1637,15 @@ def test_ptrack_alter_tablespace(self): # Move table to separate tablespace node.safe_psql( - "postgres", "alter table t_heap set tablespace somedata") + "postgres", + "alter table t_heap set tablespace somedata") # GET LOGICAL CONTENT FROM NODE result = node.safe_psql("postgres", "select * from t_heap") # FIRTS PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) # GET PHYSICAL CONTENT FROM NODE if self.paranoia: @@ -1379,13 +1653,12 @@ def test_ptrack_alter_tablespace(self): # Restore ptrack backup restored_node = self.make_simple_node( - base_dir="{0}/{1}/restored_node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'restored_node')) restored_node.cleanup() tblspc_path_new = self.get_tblspace_path( restored_node, 'somedata_restored') self.restore_node(backup_dir, 'node', restored_node, options=[ - "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new), - "--recovery-target-action=promote"]) + "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new)]) # GET PHYSICAL CONTENT FROM RESTORED NODE and COMPARE PHYSICAL CONTENT if self.paranoia: @@ -1394,8 +1667,8 @@ def test_ptrack_alter_tablespace(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - restored_node.append_conf( - "postgresql.auto.conf", "port = {0}".format(restored_node.port)) + self.set_auto_conf( + restored_node, {'port': restored_node.port}) restored_node.slow_start() # COMPARE LOGICAL CONTENT @@ -1412,15 +1685,16 @@ def test_ptrack_alter_tablespace(self): # SECOND PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) # Restore second ptrack backup and check table consistency - self.restore_node(backup_dir, 'node', restored_node, options=[ - "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new), - "--recovery-target-action=promote"]) + self.restore_node( + backup_dir, 'node', restored_node, + options=[ + "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new)]) # GET PHYSICAL CONTENT FROM RESTORED NODE and COMPARE PHYSICAL CONTENT if self.paranoia: @@ -1429,8 +1703,8 @@ def test_ptrack_alter_tablespace(self): self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - restored_node.append_conf( - "postgresql.auto.conf", "port = {0}".format(restored_node.port)) + self.set_auto_conf( + restored_node, {'port': restored_node.port}) restored_node.slow_start() result_new = restored_node.safe_psql( @@ -1449,97 +1723,106 @@ def test_ptrack_multiple_segments(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_senders': '2', - 'ptrack_enable': 'on', 'fsync': 'off', 'autovacuum': 'off', - 'full_page_writes': 'off' - } - ) + 'full_page_writes': 'off'}) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") self.create_tblspace_in_node(node, 'somedata') # CREATE TABLE node.pgbench_init(scale=100, options=['--tablespace=somedata']) # FULL BACKUP - self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, options=['--stream']) # PTRACK STUFF - idx_ptrack = {'type': 'heap'} - idx_ptrack['path'] = self.get_fork_path(node, 'pgbench_accounts') - idx_ptrack['old_size'] = self.get_fork_size(node, 'pgbench_accounts') - idx_ptrack['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack['path'], idx_ptrack['old_size']) - - pgbench = node.pgbench(options=['-T', '150', '-c', '2', '--no-vacuum']) + if node.major_version < 12: + idx_ptrack = {'type': 'heap'} + idx_ptrack['path'] = self.get_fork_path(node, 'pgbench_accounts') + idx_ptrack['old_size'] = self.get_fork_size(node, 'pgbench_accounts') + idx_ptrack['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack['path'], idx_ptrack['old_size']) + + pgbench = node.pgbench( + options=['-T', '30', '-c', '1', '--no-vacuum']) pgbench.wait() + node.safe_psql("postgres", "checkpoint") - idx_ptrack['new_size'] = self.get_fork_size( - node, - 'pgbench_accounts' - ) - idx_ptrack['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack['path'], - idx_ptrack['new_size'] - ) - idx_ptrack['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, - idx_ptrack['path'] - ) - self.check_ptrack_sanity(idx_ptrack) + if node.major_version < 12: + idx_ptrack['new_size'] = self.get_fork_size( + node, + 'pgbench_accounts') - # GET LOGICAL CONTENT FROM NODE - result = node.safe_psql("postgres", "select * from pgbench_accounts") + idx_ptrack['new_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack['path'], + idx_ptrack['new_size']) + + idx_ptrack['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, + idx_ptrack['path']) + + if not self.check_ptrack_sanity(idx_ptrack): + self.assertTrue( + False, 'Ptrack has failed to register changes in data files') + + # GET LOGICAL CONTENT FROM NODE + # it`s stupid, because hint`s are ignored by ptrack + result = node.safe_psql("postgres", "select * from pgbench_accounts") # FIRTS PTRACK BACKUP self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=["--log-level-file=verbose"] - ) + backup_dir, 'node', node, backup_type='ptrack', options=['--stream']) + # GET PHYSICAL CONTENT FROM NODE pgdata = self.pgdata_content(node.data_dir) # RESTORE NODE restored_node = self.make_simple_node( - base_dir="{0}/{1}/restored_node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'restored_node')) restored_node.cleanup() tblspc_path = self.get_tblspace_path(node, 'somedata') tblspc_path_new = self.get_tblspace_path( restored_node, - 'somedata_restored' - ) + 'somedata_restored') - self.restore_node(backup_dir, 'node', restored_node, options=[ - "-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new), - "--recovery-target-action=promote"]) + self.restore_node( + backup_dir, 'node', restored_node, + options=[ + "-j", "4", "-T", "{0}={1}".format( + tblspc_path, tblspc_path_new)]) # GET PHYSICAL CONTENT FROM NODE_RESTORED if self.paranoia: pgdata_restored = self.pgdata_content( restored_node.data_dir, ignore_ptrack=False) - self.compare_pgdata(pgdata, pgdata_restored) # START RESTORED NODE - restored_node.append_conf( - "postgresql.auto.conf", "port = {0}".format(restored_node.port)) + self.set_auto_conf( + restored_node, {'port': restored_node.port}) restored_node.slow_start() result_new = restored_node.safe_psql( "postgres", - "select * from pgbench_accounts" - ) + "select * from pgbench_accounts") # COMPARE RESTORED FILES self.assertEqual(result, result_new, 'data is lost') + if self.paranoia: + self.compare_pgdata(pgdata, pgdata_restored) + # Clean after yourself self.del_test_dir(module_name, fname) @@ -1547,22 +1830,25 @@ def test_ptrack_multiple_segments(self): # @unittest.expectedFailure def test_atexit_fail(self): """ - Take backups of every available types and check that PTRACK is clean + Take backups of every available types and check that PTRACK is clean. + Relevant only for PTRACK 1.x """ + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + fname = self.id().split('.')[3] node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'node'), set_replication=True, + ptrack_enable=True, initdb_params=['--data-checksums'], pg_options={ - 'ptrack_enable': 'on', - 'wal_level': 'replica', - 'max_wal_senders': '2', 'max_connections': '15'}) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) - node.start() + node.slow_start() # Take FULL backup to clean every ptrack self.backup_node( @@ -1571,10 +1857,8 @@ def test_atexit_fail(self): try: self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=[ - "--stream", "-j 30", - "--log-level-file=verbose"] - ) + options=["--stream", "-j 30"]) + # we should die here because exception is what we expect to happen self.assertEqual( 1, 0, @@ -1598,3 +1882,2253 @@ def test_atexit_fail(self): # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_clean(self): + """ + Take backups of every available types and check that PTRACK is clean + Relevant only for PTRACK 1.x + """ + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, nextval('t_seq') as t_seq, " + "md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + # Take FULL backup to clean every ptrack + self.backup_node( + backup_dir, 'node', node, + options=['-j10', '--stream']) + node.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get fork size and calculate it in pages + idx_ptrack[i]['size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) + + # Update everything and vacuum it + node.safe_psql( + 'postgres', + "update t_heap set t_seq = nextval('t_seq'), " + "text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") + node.safe_psql('postgres', 'vacuum t_heap') + + # Take PTRACK backup to clean every ptrack + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', options=['-j10', '--stream']) + + node.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get new size of heap and indexes and calculate it in pages + idx_ptrack[i]['size'] = self.get_fork_size(node, i) + # update path to heap and index files in case they`ve changed + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + # check that ptrack bits are cleaned + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) + + # Update everything and vacuum it + node.safe_psql( + 'postgres', + "update t_heap set t_seq = nextval('t_seq'), " + "text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") + node.safe_psql('postgres', 'vacuum t_heap') + + # Take PAGE backup to clean every ptrack + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['-j10', '--stream']) + node.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get new size of heap and indexes and calculate it in pages + idx_ptrack[i]['size'] = self.get_fork_size(node, i) + # update path to heap and index files in case they`ve changed + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + # check that ptrack bits are cleaned + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_clean_replica(self): + """ + Take backups of every available types from + master and check that PTRACK on replica is clean. + Relevant only for PTRACK 1.x + """ + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '30s'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "nextval('t_seq') as t_seq, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + # Take FULL backup to clean every ptrack + self.backup_node( + backup_dir, + 'replica', + replica, + options=[ + '-j10', '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port)]) + master.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get fork size and calculate it in pages + idx_ptrack[i]['size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + replica, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) + + # Update everything and vacuum it + master.safe_psql( + 'postgres', + "update t_heap set t_seq = nextval('t_seq'), " + "text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") + master.safe_psql('postgres', 'vacuum t_heap') + + # Take PTRACK backup to clean every ptrack + backup_id = self.backup_node( + backup_dir, + 'replica', + replica, + backup_type='ptrack', + options=[ + '-j10', '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port)]) + master.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get new size of heap and indexes and calculate it in pages + idx_ptrack[i]['size'] = self.get_fork_size(replica, i) + # update path to heap and index files in case they`ve changed + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + replica, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + # check that ptrack bits are cleaned + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) + + # Update everything and vacuum it + master.safe_psql( + 'postgres', + "update t_heap set t_seq = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Take PAGE backup to clean every ptrack + self.backup_node( + backup_dir, + 'replica', + replica, + backup_type='page', + options=[ + '-j10', '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port), + '--stream']) + master.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get new size of heap and indexes and calculate it in pages + idx_ptrack[i]['size'] = self.get_fork_size(replica, i) + # update path to heap and index files in case they`ve changed + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + replica, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + # check that ptrack bits are cleaned + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_cluster_on_btree(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, nextval('t_seq') as t_seq, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector " + "as tsvector from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'vacuum t_heap') + node.safe_psql('postgres', 'checkpoint') + + if node.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + node.safe_psql('postgres', 'cluster t_heap using t_btree') + node.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + if node.major_version < 12: + self.check_ptrack_map_sanity(node, idx_ptrack) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_cluster_on_gist(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # Create table and indexes + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "nextval('t_seq') as t_seq, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'vacuum t_heap') + node.safe_psql('postgres', 'checkpoint') + + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + node.safe_psql('postgres', 'cluster t_heap using t_gist') + node.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + self.check_ptrack_map_sanity(node, idx_ptrack) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_cluster_on_btree_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "nextval('t_seq') as t_seq, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + self.backup_node( + backup_dir, 'replica', replica, options=[ + '-j10', '--stream', '--master-host=localhost', + '--master-db=postgres', '--master-port={0}'.format( + master.port)]) + + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + master.safe_psql('postgres', 'cluster t_heap using t_btree') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + self.check_ptrack_map_sanity(replica, idx_ptrack) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_cluster_on_gist_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, 'replica', synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "nextval('t_seq') as t_seq, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + self.backup_node( + backup_dir, 'replica', replica, options=[ + '-j10', '--stream', '--master-host=localhost', + '--master-db=postgres', '--master-port={0}'.format( + master.port)]) + + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + master.safe_psql('postgres', 'DELETE FROM t_heap WHERE id%2 = 1') + master.safe_psql('postgres', 'CLUSTER t_heap USING t_gist') + + if master.major_version < 12: + master.safe_psql('postgres', 'CHECKPOINT') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + + if master.major_version < 12: + replica.safe_psql('postgres', 'CHECKPOINT') + self.check_ptrack_map_sanity(replica, idx_ptrack) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='ptrack', options=['-j10', '--stream']) + + if self.paranoia: + pgdata = self.pgdata_content(replica.data_dir) + + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node')) + node.cleanup() + + self.restore_node(backup_dir, 'replica', node) + + if self.paranoia: + pgdata_restored = self.pgdata_content(replica.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_empty(self): + """Take backups of every available types and check that PTRACK is clean""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "(id int DEFAULT nextval('t_seq'), text text, tsvector tsvector) " + "tablespace somedata") + + # Take FULL backup to clean every ptrack + self.backup_node( + backup_dir, 'node', node, + options=['-j10', '--stream']) + + # Create indexes + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'checkpoint') + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + tblspace1 = self.get_tblspace_path(node, 'somedata') + tblspace2 = self.get_tblspace_path(node_restored, 'somedata') + + # Take PTRACK backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='ptrack', + options=['-j10', '--stream']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + self.restore_node( + backup_dir, 'node', node_restored, + backup_id=backup_id, + options=[ + "-j", "4", + "-T{0}={1}".format(tblspace1, tblspace2)]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_empty_replica(self): + """ + Take backups of every available types from master + and check that PTRACK on replica is clean + """ + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + ptrack_enable=True, + pg_options={ + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) + replica.slow_start(replica=True) + + # Create table + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "(id int DEFAULT nextval('t_seq'), text text, tsvector tsvector)") + self.wait_until_replica_catch_with_master(master, replica) + + # Take FULL backup + self.backup_node( + backup_dir, + 'replica', + replica, + options=[ + '-j10', '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port)]) + + # Create indexes + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + self.wait_until_replica_catch_with_master(master, replica) + + # Take PTRACK backup + backup_id = self.backup_node( + backup_dir, + 'replica', + replica, + backup_type='ptrack', + options=[ + '-j1', '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port)]) + + if self.paranoia: + pgdata = self.pgdata_content(replica.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'replica', node_restored, + backup_id=backup_id, options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_truncate(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + if node.major_version < 12: + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.safe_psql('postgres', 'truncate t_heap') + node.safe_psql('postgres', 'checkpoint') + + if node.major_version < 12: + for i in idx_ptrack: + # get fork size and calculate it in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums for every page of this fork + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + # Make backup to clean every ptrack + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + if node.major_version < 12: + for i in idx_ptrack: + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) + + node.cleanup() + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_truncate_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_size': '32MB', + 'archive_timeout': '10s', + 'checkpoint_timeout': '30s'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, 'replica', synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) ".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + if replica.major_version < 12: + for i in idx_ptrack: + # get fork size and calculate it in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums for every page of this fork + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + # Make backup to clean every ptrack + self.backup_node( + backup_dir, 'replica', replica, + options=[ + '-j10', + '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port)]) + + if replica.major_version < 12: + for i in idx_ptrack: + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) + + master.safe_psql('postgres', 'truncate t_heap') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + self.backup_node( + backup_dir, 'replica', replica, backup_type='ptrack', + options=[ + '-j10', + '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port)]) + + pgdata = self.pgdata_content(replica.data_dir) + + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node')) + node.cleanup() + + self.restore_node(backup_dir, 'replica', node, data_dir=node.data_dir) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'vacuum t_heap') + node.safe_psql('postgres', 'checkpoint') + + # Make full backup to clean every ptrack + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + if node.major_version < 12: + for i in idx_ptrack: + # get fork size and calculate it in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums for every page of this fork + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) + + # Delete some rows, vacuum it and make checkpoint + node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + node.safe_psql('postgres', 'vacuum t_heap') + node.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + if node.major_version < 12: + self.check_ptrack_map_sanity(node, idx_ptrack) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_vacuum_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '30'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, 'replica', synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector " + "as tsvector from generate_series(0,2560) i") + + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + # Make FULL backup to clean every ptrack + self.backup_node( + backup_dir, 'replica', replica, options=[ + '-j10', '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port), + '--stream']) + + if replica.major_version < 12: + for i in idx_ptrack: + # get fork size and calculate it in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums for every page of this fork + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) + self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) + + # Delete some rows, vacuum it and make checkpoint + master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + if replica.major_version < 12: + self.check_ptrack_map_sanity(master, idx_ptrack) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(replica.data_dir) + + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node')) + node.cleanup() + + self.restore_node(backup_dir, 'replica', node, data_dir=node.data_dir) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum_bits_frozen(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + res = node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'checkpoint') + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + node.safe_psql('postgres', 'vacuum freeze t_heap') + node.safe_psql('postgres', 'checkpoint') + + if node.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + # CHECK PTRACK SANITY + if node.major_version < 12: + self.check_ptrack_map_sanity(node, idx_ptrack) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_ptrack_vacuum_bits_frozen_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector " + "as tsvector from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + # Take backup to clean every ptrack + self.backup_node( + backup_dir, 'replica', replica, + options=[ + '-j10', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port), + '--stream']) + + if replica.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + master.safe_psql('postgres', 'vacuum freeze t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + if replica.major_version < 12: + self.check_ptrack_map_sanity(master, idx_ptrack) + + self.backup_node( + backup_dir, 'replica', replica, backup_type='ptrack', + options=['-j10', '--stream']) + + pgdata = self.pgdata_content(replica.data_dir) + replica.cleanup() + + self.restore_node(backup_dir, 'replica', replica) + + pgdata_restored = self.pgdata_content(replica.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum_bits_visibility(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + res = node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'checkpoint') + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + if node.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + node.safe_psql('postgres', 'vacuum t_heap') + node.safe_psql('postgres', 'checkpoint') + + # CHECK PTRACK SANITY + if node.major_version < 12: + self.check_ptrack_map_sanity(node, idx_ptrack) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum_full(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.create_tblspace_in_node(node, 'somedata') + + # Create table and indexes + res = node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", "create index {0} on {1} " + "using {2}({3}) tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'vacuum t_heap') + node.safe_psql('postgres', 'checkpoint') + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + if node.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + node.safe_psql('postgres', 'vacuum full t_heap') + node.safe_psql('postgres', 'checkpoint') + + if node.major_version < 12: + self.check_ptrack_map_sanity(node, idx_ptrack) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + + shutil.rmtree( + self.get_tblspace_path(node, 'somedata'), + ignore_errors=True) + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum_full_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, 'replica', synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector as " + "tsvector from generate_series(0,256000) i") + + if master.major_version < 12: + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", + "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], + idx_ptrack[i]['column'])) + + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + # Take FULL backup to clean every ptrack + self.backup_node( + backup_dir, 'replica', replica, + options=[ + '-j10', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port), + '--stream']) + + if replica.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') + master.safe_psql('postgres', 'vacuum full t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'checkpoint') + + if replica.major_version < 12: + self.check_ptrack_map_sanity(master, idx_ptrack) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='ptrack', options=['-j10', '--stream']) + + pgdata = self.pgdata_content(replica.data_dir) + replica.cleanup() + + self.restore_node(backup_dir, 'replica', replica) + + pgdata_restored = self.pgdata_content(replica.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum_truncate(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # Create table and indexes + res = node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + if node.major_version < 12: + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", "create index {0} on {1} using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql('postgres', 'VACUUM t_heap') + + self.backup_node( + backup_dir, 'node', node, options=['-j10', '--stream']) + + if node.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + node.safe_psql('postgres', 'DELETE FROM t_heap WHERE id > 128') + node.safe_psql('postgres', 'VACUUM t_heap') + node.safe_psql('postgres', 'CHECKPOINT') + + # CHECK PTRACK SANITY + if node.major_version < 12: + self.check_ptrack_map_sanity(node, idx_ptrack) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_vacuum_truncate_replica(self): + fname = self.id().split('.')[3] + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.restore_node(backup_dir, 'master', replica) + + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, 'replica', synchronous=True) + replica.slow_start(replica=True) + + # Create table and indexes + master.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap as select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector " + "as tsvector from generate_series(0,2560) i") + + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + master.safe_psql( + "postgres", "create index {0} on {1} " + "using {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + master.safe_psql('postgres', 'vacuum t_heap') + master.safe_psql('postgres', 'checkpoint') + + # Take FULL backup to clean every ptrack + self.backup_node( + backup_dir, 'replica', replica, + options=[ + '-j10', + '--stream', + '--master-host=localhost', + '--master-db=postgres', + '--master-port={0}'.format(master.port) + ] + ) + + if master.major_version < 12: + for i in idx_ptrack: + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(replica, i) + # calculate md5sums of pages + idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( + idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) + + master.safe_psql('postgres', 'DELETE FROM t_heap WHERE id > 128;') + master.safe_psql('postgres', 'VACUUM t_heap') + master.safe_psql('postgres', 'CHECKPOINT') + + # Sync master and replica + self.wait_until_replica_catch_with_master(master, replica) + replica.safe_psql('postgres', 'CHECKPOINT') + + # CHECK PTRACK SANITY + if master.major_version < 12: + self.check_ptrack_map_sanity(master, idx_ptrack) + + self.backup_node( + backup_dir, 'replica', replica, backup_type='ptrack', + options=[ + '--stream', + '--log-level-file=INFO', + '--archive-timeout=30']) + + pgdata = self.pgdata_content(replica.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'replica', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_recovery(self): + if self.pg_config_version > self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL =< 11 for this test') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node(node, 'somedata') + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap tablespace somedata " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + # Create indexes + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", "create index {0} on {1} using {2}({3}) " + "tablespace somedata".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + # get size of heap and indexes. size calculated in pages + idx_ptrack[i]['size'] = int(self.get_fork_size(node, i)) + # get path to heap and index files + idx_ptrack[i]['path'] = self.get_fork_path(node, i) + + if self.verbose: + print('Killing postmaster. Losing Ptrack changes') + node.stop(['-m', 'immediate', '-D', node.data_dir]) + if not node.status(): + node.slow_start() + else: + print("Die! Die! Why won't you die?... Why won't you die?") + exit(1) + + for i in idx_ptrack: + # get ptrack for every idx + idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( + node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) + # check that ptrack has correct bits after recovery + self.check_ptrack_recovery(idx_ptrack[i]) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_recovery_1(self): + if self.pg_config_version < self.version_to_num('12.0'): + return unittest.skip('You need PostgreSQL >= 12 for this test') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'shared_buffers': '512MB', + 'max_wal_size': '3GB'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select nextval('t_seq')::int as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " +# "from generate_series(0,25600) i") + "from generate_series(0,2560) i") + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Create indexes + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "CREATE INDEX {0} ON {1} USING {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + + node.safe_psql( + 'postgres', + "create extension pg_buffercache") + + print(node.safe_psql( + 'postgres', + "SELECT count(*) FROM pg_buffercache WHERE isdirty")) + + if self.verbose: + print('Killing postmaster. Losing Ptrack changes') + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + if not node.status(): + node.slow_start() + else: + print("Die! Die! Why won't you die?... Why won't you die?") + exit(1) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_zero_changes(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # Create table + node.safe_psql( + "postgres", + "create table t_heap " + "as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_ptrack_pg_resetxlog(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'shared_buffers': '512MB', + 'max_wal_size': '3GB'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select nextval('t_seq')::int as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " +# "from generate_series(0,25600) i") + "from generate_series(0,2560) i") + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Create indexes + for i in idx_ptrack: + if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': + node.safe_psql( + "postgres", + "CREATE INDEX {0} ON {1} USING {2}({3})".format( + i, idx_ptrack[i]['relation'], + idx_ptrack[i]['type'], idx_ptrack[i]['column'])) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + +# node.safe_psql( +# 'postgres', +# "create extension pg_buffercache") +# +# print(node.safe_psql( +# 'postgres', +# "SELECT count(*) FROM pg_buffercache WHERE isdirty")) + + # kill the bastard + if self.verbose: + print('Killing postmaster. Losing Ptrack changes') + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + # now smack it with sledgehammer + if node.major_version >= 10: + pg_resetxlog_path = self.get_bin_path('pg_resetwal') + wal_dir = 'pg_wal' + else: + pg_resetxlog_path = self.get_bin_path('pg_resetxlog') + wal_dir = 'pg_xlog' + + self.run_binary( + [ + pg_resetxlog_path, + '-D', + node.data_dir, + '-o 42', + '-f' + ], + asynchronous=False) + + if not node.status(): + node.slow_start() + else: + print("Die! Die! Why won't you die?... Why won't you die?") + exit(1) + + # take ptrack backup +# self.backup_node( +# backup_dir, 'node', node, +# backup_type='ptrack', options=['--stream']) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because instance was brutalized by pg_resetxlog" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd) + ) + except ProbackupException as e: + self.assertTrue( + 'ERROR: LSN from ptrack_control ' in e.message and + 'differs from Start LSN of previous backup' in e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd)) + +# pgdata = self.pgdata_content(node.data_dir) +# +# node_restored = self.make_simple_node( +# base_dir=os.path.join(module_name, fname, 'node_restored')) +# node_restored.cleanup() +# +# self.restore_node( +# backup_dir, 'node', node_restored) +# +# pgdata_restored = self.pgdata_content(node_restored.data_dir) +# self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_corrupt_ptrack_map(self): + + if self.pg_config_version < self.version_to_num('12.0'): + return unittest.skip('You need PostgreSQL >= 12 for this test') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # Create table + node.safe_psql( + "postgres", + "create extension bloom; create sequence t_seq; " + "create table t_heap " + "as select nextval('t_seq')::int as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,2560) i") + + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + + # kill the bastard + if self.verbose: + print('Killing postmaster. Losing Ptrack changes') + + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + ptrack_map = os.path.join(node.data_dir, 'global', 'ptrack.map') + ptrack_map_mmap = os.path.join(node.data_dir, 'global', 'ptrack.map.mmap') + + # Let`s do index corruption. ptrack.map, ptrack.map.mmap + with open(ptrack_map, "rb+", 0) as f: + f.seek(42) + f.write(b"blablahblahs") + f.flush() + f.close + + with open(ptrack_map_mmap, "rb+", 0) as f: + f.seek(42) + f.write(b"blablahblahs") + f.flush() + f.close + +# os.remove(os.path.join(node.logs_dir, node.pg_log_name)) + + try: + node.slow_start() + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because ptrack.map is corrupted" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except StartNodeException as e: + self.assertIn( + 'Cannot start node', + e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd)) + + log_file = os.path.join(node.logs_dir, 'postgresql.log') + with open(log_file, 'r') as f: + log_content = f.read() + + self.assertIn( + 'FATAL: ptrack init: incorrect checksum of file "{0}"'.format(ptrack_map), + log_content) + + self.set_auto_conf(node, {'ptrack.map_size': '0'}) + + node.slow_start() + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because instance ptrack is disabled" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Ptrack is disabled', + e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd)) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + + node.stop(['-m', 'immediate', '-D', node.data_dir]) + + self.set_auto_conf(node, {'ptrack.map_size': '32', 'shared_preload_libraries': 'ptrack'}) + + node.slow_start() + + sleep(1) + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because ptrack map is from future" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: LSN from ptrack_control', + e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd)) + + sleep(1) + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + node.safe_psql( + 'postgres', + "update t_heap set id = nextval('t_seq'), text = md5(text), " + "tsvector = md5(repeat(tsvector::text, 10))::tsvector") + + self.backup_node( + backup_dir, 'node', node, + backup_type='ptrack', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_clean.py b/tests/ptrack_clean.py deleted file mode 100644 index f4350af04..000000000 --- a/tests/ptrack_clean.py +++ /dev/null @@ -1,253 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack -import time - - -module_name = 'ptrack_clean' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_clean(self): - """Take backups of every available types and check that PTRACK is clean""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'ptrack_enable': 'on', - 'wal_level': 'replica', - 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata " - "as select i as id, nextval('t_seq') as t_seq, " - "md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql( - "postgres", - "create index {0} on {1} using {2}({3}) " - "tablespace somedata".format( - i, idx_ptrack[i]['relation'], - idx_ptrack[i]['type'], - idx_ptrack[i]['column'])) - - # Take FULL backup to clean every ptrack - self.backup_node( - backup_dir, 'node', node, - options=['-j10', '--stream']) - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get fork size and calculate it in pages - idx_ptrack[i]['size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) - - # Update everything and vacuum it - node.safe_psql( - 'postgres', - "update t_heap set t_seq = nextval('t_seq'), " - "text = md5(text), " - "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") - node.safe_psql('postgres', 'vacuum t_heap') - - # Take PTRACK backup to clean every ptrack - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['-j10', '--log-level-file=verbose']) - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - # check that ptrack bits are cleaned - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) - - # Update everything and vacuum it - node.safe_psql( - 'postgres', - "update t_heap set t_seq = nextval('t_seq'), " - "text = md5(text), " - "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") - node.safe_psql('postgres', 'vacuum t_heap') - - # Take PAGE backup to clean every ptrack - self.backup_node( - backup_dir, 'node', node, - backup_type='page', options=['-j10']) - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - # check that ptrack bits are cleaned - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_clean_replica(self): - """Take backups of every available types from master and check that PTRACK on replica is clean""" - fname = self.id().split('.')[3] - master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'ptrack_enable': 'on', - 'wal_level': 'replica', - 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, " - "nextval('t_seq') as t_seq, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql( - "postgres", - "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], - idx_ptrack[i]['type'], - idx_ptrack[i]['column'])) - - # Take FULL backup to clean every ptrack - self.backup_node( - backup_dir, - 'replica', - replica, - options=[ - '-j10', '--stream', - '--master-host=localhost', - '--master-db=postgres', - '--master-port={0}'.format(master.port)]) - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get fork size and calculate it in pages - idx_ptrack[i]['size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) - - # Update everything and vacuum it - master.safe_psql( - 'postgres', - "update t_heap set t_seq = nextval('t_seq'), " - "text = md5(text), " - "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") - master.safe_psql('postgres', 'vacuum t_heap') - - # Take PTRACK backup to clean every ptrack - backup_id = self.backup_node( - backup_dir, - 'replica', - replica, - backup_type='ptrack', - options=[ - '-j10', '--stream', - '--master-host=localhost', - '--master-db=postgres', - '--master-port={0}'.format(master.port)]) - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - # check that ptrack bits are cleaned - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) - - # Update everything and vacuum it - master.safe_psql( - 'postgres', - "update t_heap set t_seq = nextval('t_seq'), text = md5(text), " - "tsvector = md5(repeat(tsvector::text, 10))::tsvector;") - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - # Take PAGE backup to clean every ptrack - self.backup_node( - backup_dir, - 'replica', - replica, - backup_type='page', - options=[ - '-j10', '--master-host=localhost', - '--master-db=postgres', - '--master-port={0}'.format(master.port)]) - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - # check that ptrack bits are cleaned - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['size']) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_cluster.py b/tests/ptrack_cluster.py deleted file mode 100644 index 784751ef6..000000000 --- a/tests/ptrack_cluster.py +++ /dev/null @@ -1,268 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack -from time import sleep -from sys import exit - - -module_name = 'ptrack_cluster' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_cluster_on_btree(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, nextval('t_seq') as t_seq, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - - node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - node.safe_psql('postgres', 'cluster t_heap using t_btree') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_cluster_on_gist(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - # Create table and indexes - node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, nextval('t_seq') as t_seq, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - - node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - node.safe_psql('postgres', 'cluster t_heap using t_gist') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # Compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_cluster_on_btree_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, nextval('t_seq') as t_seq, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'replica', replica, options=['-j10', '--stream', - '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) - - master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - master.safe_psql('postgres', 'cluster t_heap using t_btree') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - #@unittest.skip("skip") - def test_ptrack_cluster_on_gist_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, 'replica', synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, nextval('t_seq') as t_seq, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'replica', replica, options=['-j10', '--stream', - '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) - - master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - master.safe_psql('postgres', 'cluster t_heap using t_gist') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # Compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_move_to_tablespace.py b/tests/ptrack_move_to_tablespace.py deleted file mode 100644 index 98c209142..000000000 --- a/tests/ptrack_move_to_tablespace.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_move_to_tablespace' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_recovery(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - node.safe_psql("postgres", - "create sequence t_seq; create table t_heap as select i as id, md5(i::text) as text,md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - # Move table and indexes and make checkpoint - for i in idx_ptrack: - if idx_ptrack[i]['type'] == 'heap': - node.safe_psql('postgres', 'alter table {0} set tablespace somedata;'.format(i)) - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql('postgres', 'alter index {0} set tablespace somedata'.format(i)) - node.safe_psql('postgres', 'checkpoint') - - # Check ptrack files - for i in idx_ptrack: - if idx_ptrack[i]['type'] == 'seq': - continue - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - # check that ptrack has correct bits after recovery - self.check_ptrack_recovery(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_recovery.py b/tests/ptrack_recovery.py deleted file mode 100644 index 8569ef592..000000000 --- a/tests/ptrack_recovery.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import unittest -from sys import exit -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_recovery' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_recovery(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table - node.safe_psql("postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text,md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - # Create indexes - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['size'] = int(self.get_fork_size(node, i)) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - - if self.verbose: - print('Killing postmaster. Losing Ptrack changes') - node.stop(['-m', 'immediate', '-D', node.data_dir]) - if not node.status(): - node.start() - else: - print("Die! Die! Why won't you die?... Why won't you die?") - exit(1) - - for i in idx_ptrack: - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['size']]) - # check that ptrack has correct bits after recovery - self.check_ptrack_recovery(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_truncate.py b/tests/ptrack_truncate.py deleted file mode 100644 index 928608c4a..000000000 --- a/tests/ptrack_truncate.py +++ /dev/null @@ -1,130 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_truncate' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_truncate(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'truncate t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get fork size and calculate it in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums for every page of this fork - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Make full backup to clean every ptrack - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - for i in idx_ptrack: - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_truncate_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, 'replica', synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - replica.safe_psql('postgres', 'truncate t_heap') - replica.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get fork size and calculate it in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums for every page of this fork - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Make full backup to clean every ptrack - self.backup_node(backup_dir, 'replica', replica, options=['-j10', '--stream']) - for i in idx_ptrack: - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) - - # Delete some rows, vacuum it and make checkpoint - master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_vacuum.py b/tests/ptrack_vacuum.py deleted file mode 100644 index 0409cae3f..000000000 --- a/tests/ptrack_vacuum.py +++ /dev/null @@ -1,152 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_vacuum' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get fork size and calculate it in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums for every page of this fork - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Make full backup to clean every ptrack - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - for i in idx_ptrack: - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) - - # Delete some rows, vacuum it and make checkpoint - node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_vacuum_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, 'replica', synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get fork size and calculate it in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums for every page of this fork - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Make FULL backup to clean every ptrack - self.backup_node(backup_dir, 'replica', replica, options=['-j10', - '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) - for i in idx_ptrack: - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size']]) - self.check_ptrack_clean(idx_ptrack[i], idx_ptrack[i]['old_size']) - - # Delete some rows, vacuum it and make checkpoint - master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - # CHECK PTRACK SANITY - for i in idx_ptrack: - # get new size of heap and indexes and calculate it in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_vacuum_bits_frozen.py b/tests/ptrack_vacuum_bits_frozen.py deleted file mode 100644 index f0cd3bbda..000000000 --- a/tests/ptrack_vacuum_bits_frozen.py +++ /dev/null @@ -1,136 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_vacuum_bits_frozen' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum_bits_frozen(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - res = node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - - node.safe_psql('postgres', 'vacuum freeze t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_ptrack_vacuum_bits_frozen_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Take PTRACK backup to clean every ptrack - self.backup_node(backup_dir, 'replica', replica, options=['-j10', - '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) - - master.safe_psql('postgres', 'vacuum freeze t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_vacuum_bits_visibility.py b/tests/ptrack_vacuum_bits_visibility.py deleted file mode 100644 index 45a8d9b60..000000000 --- a/tests/ptrack_vacuum_bits_visibility.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_vacuum_bits_visibility' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum_bits_visibility(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - res = node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_vacuum_full.py b/tests/ptrack_vacuum_full.py deleted file mode 100644 index ec12c9e27..000000000 --- a/tests/ptrack_vacuum_full.py +++ /dev/null @@ -1,140 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_vacuum_full' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum_full(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - res = node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,127) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - - node.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - node.safe_psql('postgres', 'vacuum full t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity, the most important part - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum_full_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, 'replica', synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,127) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Take FULL backup to clean every ptrack - self.backup_node(backup_dir, 'replica', replica, options=['-j10', - '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) - - master.safe_psql('postgres', 'delete from t_heap where id%2 = 1') - master.safe_psql('postgres', 'vacuum full t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity, the most important part - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/ptrack_vacuum_truncate.py b/tests/ptrack_vacuum_truncate.py deleted file mode 100644 index 5c84c7e8f..000000000 --- a/tests/ptrack_vacuum_truncate.py +++ /dev/null @@ -1,142 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, idx_ptrack - - -module_name = 'ptrack_vacuum_truncate' - - -class SimpleTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum_truncate(self): - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - self.create_tblspace_in_node(node, 'somedata') - - # Create table and indexes - res = node.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap tablespace somedata as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - node.safe_psql("postgres", "create index {0} on {1} using {2}({3}) tablespace somedata".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(node, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - self.backup_node(backup_dir, 'node', node, options=['-j10', '--stream']) - - node.safe_psql('postgres', 'delete from t_heap where id > 128;') - node.safe_psql('postgres', 'vacuum t_heap') - node.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(node, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(node, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - node, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_ptrack_vacuum_truncate_replica(self): - fname = self.id().split('.')[3] - master = self.make_simple_node(base_dir="{0}/{1}/master".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'ptrack_enable': 'on', 'wal_level': 'replica', 'max_wal_senders': '2'}) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'master', master) - master.start() - - self.backup_node(backup_dir, 'master', master, options=['--stream']) - - replica = self.make_simple_node(base_dir="{0}/{1}/replica".format(module_name, fname)) - replica.cleanup() - - self.restore_node(backup_dir, 'master', replica) - - self.add_instance(backup_dir, 'replica', replica) - self.set_replica(master, replica, 'replica', synchronous=True) - self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.start() - - # Create table and indexes - master.safe_psql( - "postgres", - "create sequence t_seq; create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") - for i in idx_ptrack: - if idx_ptrack[i]['type'] != 'heap' and idx_ptrack[i]['type'] != 'seq': - master.safe_psql("postgres", "create index {0} on {1} using {2}({3})".format( - i, idx_ptrack[i]['relation'], idx_ptrack[i]['type'], idx_ptrack[i]['column'])) - - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get size of heap and indexes. size calculated in pages - idx_ptrack[i]['old_size'] = self.get_fork_size(replica, i) - # get path to heap and index files - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate md5sums of pages - idx_ptrack[i]['old_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['old_size']) - - # Take PTRACK backup to clean every ptrack - self.backup_node(backup_dir, 'replica', replica, options=['-j10', - '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) - - master.safe_psql('postgres', 'delete from t_heap where id > 128;') - master.safe_psql('postgres', 'vacuum t_heap') - master.safe_psql('postgres', 'checkpoint') - - for i in idx_ptrack: - # get new size of heap and indexes. size calculated in pages - idx_ptrack[i]['new_size'] = self.get_fork_size(replica, i) - # update path to heap and index files in case they`ve changed - idx_ptrack[i]['path'] = self.get_fork_path(replica, i) - # calculate new md5sums for pages - idx_ptrack[i]['new_pages'] = self.get_md5_per_page_for_fork( - idx_ptrack[i]['path'], idx_ptrack[i]['new_size']) - # get ptrack for every idx - idx_ptrack[i]['ptrack'] = self.get_ptrack_bits_per_page_for_fork( - replica, idx_ptrack[i]['path'], [idx_ptrack[i]['old_size'], idx_ptrack[i]['new_size']]) - - # compare pages and check ptrack sanity - self.check_ptrack_sanity(idx_ptrack[i]) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/remote.py b/tests/remote.py new file mode 100644 index 000000000..4d46447f0 --- /dev/null +++ b/tests/remote.py @@ -0,0 +1,50 @@ +import unittest +import os +from time import sleep +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from .helpers.cfs_helpers import find_by_name + + +module_name = 'remote' + + +class RemoteTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_remote_sanity(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + output = self.backup_node( + backup_dir, 'node', node, + options=['--stream'], no_remote=True, return_id=False) + self.assertIn('remote: false', output) + + # try: + # self.backup_node( + # backup_dir, 'node', + # node, options=['--remote-proto=ssh', '--stream'], no_remote=True) + # # we should die here because exception is what we expect to happen + # self.assertEqual( + # 1, 0, + # "Expecting Error because remote-host option is missing." + # "\n Output: {0} \n CMD: {1}".format( + # repr(self.output), self.cmd)) + # except ProbackupException as e: + # self.assertIn( + # "Insert correct error", + # e.message, + # "\n Unexpected Error Message: {0}\n CMD: {1}".format( + # repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/replica.py b/tests/replica.py index d74c375c2..de00c195c 100644 --- a/tests/replica.py +++ b/tests/replica.py @@ -3,13 +3,14 @@ from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack from datetime import datetime, timedelta import subprocess -from sys import exit import time +from distutils.dir_util import copy_tree +from testgres import ProcessType +from time import sleep module_name = 'replica' - class ReplicaTest(ProbackupTest, unittest.TestCase): # @unittest.skip("skip") @@ -19,20 +20,31 @@ def test_replica_stream_ptrack_backup(self): make node, take full backup, restore it and make replica from it, take full stream backup from replica """ + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + if self.pg_config_version > self.version_to_num('9.6.0'): + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'master'), set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', 'ptrack_enable': 'on'} - ) - master.start() + ptrack_enable=True, + initdb_params=['--data-checksums']) + self.init_pb(backup_dir) self.add_instance(backup_dir, 'master', master) + master.slow_start() + + if master.major_version >= 12: + master.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + # CREATE TABLE master.psql( "postgres", @@ -44,7 +56,7 @@ def test_replica_stream_ptrack_backup(self): # take full backup and restore it self.backup_node(backup_dir, 'master', master, options=['--stream']) replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'replica')) replica.cleanup() self.restore_node(backup_dir, 'master', replica) self.set_replica(master, replica) @@ -64,6 +76,7 @@ def test_replica_stream_ptrack_backup(self): "from generate_series(256,512) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") self.add_instance(backup_dir, 'replica', replica) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ @@ -77,12 +90,14 @@ def test_replica_stream_ptrack_backup(self): # RESTORE FULL BACKUP TAKEN FROM PREVIOUS STEP node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node')) node.cleanup() self.restore_node(backup_dir, 'replica', data_dir=node.data_dir) - node.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + + self.set_auto_conf(node, {'port': node.port}) + node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) @@ -95,7 +110,9 @@ def test_replica_stream_ptrack_backup(self): "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(512,768) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + backup_id = self.backup_node( backup_dir, 'replica', replica, backup_type='ptrack', options=[ @@ -111,9 +128,11 @@ def test_replica_stream_ptrack_backup(self): node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) - node.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + + self.set_auto_conf(node, {'port': node.port}) + node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) @@ -130,23 +149,26 @@ def test_replica_archive_page_backup(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'master'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', - 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + 'archive_timeout': '10s', + 'checkpoint_timeout': '30s', + 'max_wal_size': '32MB'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + self.init_pb(backup_dir) self.add_instance(backup_dir, 'master', master) self.set_archiving(backup_dir, 'master', master) - # force more frequent wal switch - master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') master.slow_start() replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'replica')) replica.cleanup() self.backup_node(backup_dir, 'master', master) @@ -155,7 +177,7 @@ def test_replica_archive_page_backup(self): "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,2560) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") @@ -164,8 +186,10 @@ def test_replica_archive_page_backup(self): self.restore_node(backup_dir, 'master', replica) # Settings for Replica - self.set_replica(master, replica) + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) self.set_archiving(backup_dir, 'replica', replica, replica=True) + replica.slow_start(replica=True) # Check data correctness on replica @@ -179,68 +203,89 @@ def test_replica_archive_page_backup(self): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(256,512) i") + "from generate_series(256,25120) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") - self.add_instance(backup_dir, 'replica', replica) + + self.wait_until_replica_catch_with_master(master, replica) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ - '--archive-timeout=300', + '--archive-timeout=60', '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) # RESTORE FULL BACKUP TAKEN FROM replica node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'node')) node.cleanup() self.restore_node(backup_dir, 'replica', data_dir=node.data_dir) - node.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + + self.set_auto_conf(node, {'port': node.port, 'archive_mode': 'off'}) + node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + node.cleanup() # Change data on master, make PAGE backup from replica, # restore taken backup and check that restored data equal # to original data - master.psql( - "postgres", - "insert into t_heap as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(512,768) i") - before = master.safe_psql("postgres", "SELECT * FROM t_heap") + master.pgbench_init(scale=5) + + pgbench = master.pgbench( + options=['-T', '30', '-c', '2', '--no-vacuum']) + backup_id = self.backup_node( - backup_dir, 'replica', replica, backup_type='page', + backup_dir, 'replica', + replica, backup_type='page', options=[ - '--archive-timeout=300', + '--archive-timeout=60', '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) + + pgbench.wait() + + self.switch_wal_segment(master) + + before = master.safe_psql("postgres", "SELECT * FROM pgbench_accounts") + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) # RESTORE PAGE BACKUP TAKEN FROM replica - node.cleanup() self.restore_node( - backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) - node.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + backup_dir, 'replica', data_dir=node.data_dir, + backup_id=backup_id) + + self.set_auto_conf(node, {'port': node.port, 'archive_mode': 'off'}) + node.slow_start() + # CHECK DATA CORRECTNESS - after = node.safe_psql("postgres", "SELECT * FROM t_heap") - self.assertEqual(before, after) + after = node.safe_psql("postgres", "SELECT * FROM pgbench_accounts") + self.assertEqual( + before, after, 'Restored data is not equal to original') + + self.add_instance(backup_dir, 'node', node) + self.backup_node( + backup_dir, 'node', node, options=['--stream']) # Clean after yourself self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_make_replica_via_restore(self): + def test_basic_make_replica_via_restore(self): """ make archive master, take full and page archive backups from master, set replica, make archive backup from replica @@ -248,22 +293,24 @@ def test_make_replica_via_restore(self): fname = self.id().split('.')[3] backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') master = self.make_simple_node( - base_dir="{0}/{1}/master".format(module_name, fname), + base_dir=os.path.join(module_name, fname, 'master'), set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'wal_level': 'replica', 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} - ) + 'archive_timeout': '10s'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + self.init_pb(backup_dir) self.add_instance(backup_dir, 'master', master) self.set_archiving(backup_dir, 'master', master) - # force more frequent wal switch - master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') master.slow_start() replica = self.make_simple_node( - base_dir="{0}/{1}/replica".format(module_name, fname)) + base_dir=os.path.join(module_name, fname, 'replica')) replica.cleanup() self.backup_node(backup_dir, 'master', master) @@ -272,22 +319,1256 @@ def test_make_replica_via_restore(self): "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,8192) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") backup_id = self.backup_node( backup_dir, 'master', master, backup_type='page') self.restore_node( - backup_dir, 'master', replica, - options=['-R', '--recovery-target-action=promote']) + backup_dir, 'master', replica, options=['-R']) + + # Settings for Replica + self.add_instance(backup_dir, 'replica', replica) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + self.set_replica(master, replica, synchronous=True) + + replica.slow_start(replica=True) + + self.backup_node( + backup_dir, 'replica', replica, + options=['--archive-timeout=30s', '--stream']) + + # Clean after yourself + self.del_test_dir(module_name, fname, [master, replica]) + + # @unittest.skip("skip") + def test_take_backup_from_delayed_replica(self): + """ + make archive master, take full backups from master, + restore full backup as delayed replica, launch pgbench, + take FULL, PAGE and DELTA backups from replica + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'archive_timeout': '10s'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + master.slow_start() + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.backup_node(backup_dir, 'master', master) + + master.psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,165000) i") + + master.psql( + "postgres", + "create table t_heap_1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,165000) i") + + self.restore_node( + backup_dir, 'master', replica, options=['-R']) + + # Settings for Replica + self.add_instance(backup_dir, 'replica', replica) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + self.set_auto_conf(replica, {'port': replica.port}) + + replica.slow_start(replica=True) + + self.wait_until_replica_catch_with_master(master, replica) + + if self.get_version(master) >= self.version_to_num('12.0'): + self.set_auto_conf( + replica, {'recovery_min_apply_delay': '300s'}) + else: + replica.append_conf( + 'recovery.conf', + 'recovery_min_apply_delay = 300s') + + replica.stop() + replica.slow_start(replica=True) + + master.pgbench_init(scale=10) + + pgbench = master.pgbench( + options=['-T', '60', '-c', '2', '--no-vacuum']) + + self.backup_node( + backup_dir, 'replica', + replica, options=['--archive-timeout=60s']) + + self.backup_node( + backup_dir, 'replica', replica, + data_dir=replica.data_dir, + backup_type='page', options=['--archive-timeout=60s']) + + sleep(1) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--archive-timeout=60s']) + + pgbench.wait() + + pgbench = master.pgbench( + options=['-T', '30', '-c', '2', '--no-vacuum']) + + self.backup_node( + backup_dir, 'replica', replica, + options=['--stream']) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='page', options=['--stream']) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + + pgbench.wait() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_promote(self): + """ + start backup from replica, during backup promote replica + check that backup is failed + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '10s', + 'checkpoint_timeout': '30s', + 'max_wal_size': '32MB'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + master.slow_start() + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + self.backup_node(backup_dir, 'master', master) + + master.psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,165000) i") + + self.restore_node( + backup_dir, 'master', replica, options=['-R']) # Settings for Replica - # self.set_replica(master, replica) + self.add_instance(backup_dir, 'replica', replica) self.set_archiving(backup_dir, 'replica', replica, replica=True) - replica.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) - replica.start() + self.set_replica( + master, replica, replica_name='replica', synchronous=True) + + replica.slow_start(replica=True) + + master.psql( + "postgres", + "create table t_heap_1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,165000) i") + + self.wait_until_replica_catch_with_master(master, replica) + + # start backup from replica + gdb = self.backup_node( + backup_dir, 'replica', replica, gdb=True, + options=['--log-level-file=verbose']) + + gdb.set_breakpoint('backup_data_file') + gdb.run_until_break() + gdb.continue_execution_until_break(20) + + replica.promote() + + gdb.remove_all_breakpoints() + gdb.continue_execution_until_exit() + + backup_id = self.show_pb( + backup_dir, 'replica')[0]["id"] + + # read log file content + with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: + log_content = f.read() + f.close + + self.assertIn( + 'ERROR: the standby was promoted during online backup', + log_content) + + self.assertIn( + 'WARNING: Backup {0} is running, ' + 'setting its status to ERROR'.format(backup_id), + log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_stop_lsn_null_offset(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '1h', + 'wal_level': 'replica'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', master) + self.set_archiving(backup_dir, 'node', master) + master.slow_start() + + # freeze bgwriter to get rid of RUNNING XACTS records + bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0] + gdb_checkpointer = self.gdb_attach(bgwriter_pid) + + self.backup_node(backup_dir, 'node', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'node', replica) + + # Settings for Replica + self.set_replica(master, replica, synchronous=True) + self.set_archiving(backup_dir, 'node', replica, replica=True) + + replica.slow_start(replica=True) + + self.switch_wal_segment(master) + self.switch_wal_segment(master) + + output = self.backup_node( + backup_dir, 'node', replica, replica.data_dir, + options=[ + '--archive-timeout=30', + '--log-level-console=LOG', + '--no-validate', + '--stream'], + return_id=False) + + self.assertIn( + 'LOG: Invalid offset in stop_lsn value 0/4000000', + output) + + self.assertIn( + 'WARNING: WAL segment 000000010000000000000004 could not be streamed in 30 seconds', + output) + + self.assertIn( + 'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record', + output) + + self.assertIn( + 'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003', + output) + + self.assertIn( + 'has endpoint 0/4000000 which is ' + 'equal or greater than requested LSN 0/4000000', + output) + + self.assertIn( + 'LOG: Found prior LSN:', + output) # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_stop_lsn_null_offset_next_record(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '1h', + 'wal_level': 'replica'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + master.slow_start() + + # freeze bgwriter to get rid of RUNNING XACTS records + bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0] + gdb_checkpointer = self.gdb_attach(bgwriter_pid) + + self.backup_node(backup_dir, 'master', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + + # Settings for Replica + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + copy_tree( + os.path.join(backup_dir, 'wal', 'master'), + os.path.join(backup_dir, 'wal', 'replica')) + + replica.slow_start(replica=True) + + self.switch_wal_segment(master) + self.switch_wal_segment(master) + + # open connection to master + conn = master.connect() + + gdb = self.backup_node( + backup_dir, 'replica', replica, + options=[ + '--archive-timeout=40', + '--log-level-file=LOG', + '--no-validate', + '--stream'], + gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + gdb.remove_all_breakpoints() + gdb.continue_execution_until_running() + + sleep(5) + + conn.execute("create table t1()") + conn.commit() + + while 'RUNNING' in self.show_pb(backup_dir, 'replica')[0]['status']: + sleep(5) + + file = os.path.join(backup_dir, 'log', 'pg_probackup.log') + + with open(file) as f: + log_content = f.read() + + self.assertIn( + 'LOG: Invalid offset in stop_lsn value 0/4000000', + log_content) + + self.assertIn( + 'LOG: Looking for segment: 000000010000000000000004', + log_content) + + self.assertIn( + 'LOG: First record in WAL segment "000000010000000000000004": 0/4000028', + log_content) + + self.assertIn( + 'LOG: stop_lsn: 0/4000000', + log_content) + + self.assertTrue(self.show_pb(backup_dir, 'replica')[0]['status'] == 'DONE') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_archive_replica_null_offset(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '1h', + 'wal_level': 'replica'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', master) + self.set_archiving(backup_dir, 'node', master) + master.slow_start() + + self.backup_node(backup_dir, 'node', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'node', replica) + + # Settings for Replica + self.set_replica(master, replica, synchronous=True) + self.set_archiving(backup_dir, 'node', replica, replica=True) + + # freeze bgwriter to get rid of RUNNING XACTS records + bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0] + gdb_checkpointer = self.gdb_attach(bgwriter_pid) + + replica.slow_start(replica=True) + + self.switch_wal_segment(master) + self.switch_wal_segment(master) + + # take backup from replica + output = self.backup_node( + backup_dir, 'node', replica, replica.data_dir, + options=[ + '--archive-timeout=30', + '--log-level-console=LOG', + '--no-validate'], + return_id=False) + + self.assertIn( + 'LOG: Invalid offset in stop_lsn value 0/4000000', + output) + + self.assertIn( + 'WARNING: WAL segment 000000010000000000000004 could not be archived in 30 seconds', + output) + + self.assertIn( + 'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record', + output) + + self.assertIn( + 'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003', + output) + + self.assertIn( + 'has endpoint 0/4000000 which is ' + 'equal or greater than requested LSN 0/4000000', + output) + + self.assertIn( + 'LOG: Found prior LSN:', + output) + + print(output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_archive_replica_not_null_offset(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '1h', + 'wal_level': 'replica'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', master) + self.set_archiving(backup_dir, 'node', master) + master.slow_start() + + self.backup_node(backup_dir, 'node', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'node', replica) + + # Settings for Replica + self.set_replica(master, replica, synchronous=True) + self.set_archiving(backup_dir, 'node', replica, replica=True) + + replica.slow_start(replica=True) + + # take backup from replica + self.backup_node( + backup_dir, 'node', replica, replica.data_dir, + options=[ + '--archive-timeout=30', + '--log-level-console=LOG', + '--no-validate'], + return_id=False) + + try: + self.backup_node( + backup_dir, 'node', replica, replica.data_dir, + options=[ + '--archive-timeout=30', + '--log-level-console=LOG', + '--no-validate']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of archive timeout. " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'LOG: Looking for LSN 0/4000060 in segment: 000000010000000000000004', + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + 'INFO: Wait for LSN 0/4000060 in archived WAL segment', + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertIn( + 'ERROR: WAL segment 000000010000000000000004 could not be archived in 30 seconds', + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_toast(self): + """ + make archive master, take full and page archive backups from master, + set replica, make archive backup from replica + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'checkpoint_timeout': '1h', + 'wal_level': 'replica', + 'shared_buffers': '128MB'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + master.slow_start() + + # freeze bgwriter to get rid of RUNNING XACTS records + bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0] + gdb_checkpointer = self.gdb_attach(bgwriter_pid) + + self.backup_node(backup_dir, 'master', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + + # Settings for Replica + self.add_instance(backup_dir, 'replica', replica) + self.set_replica(master, replica, synchronous=True) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + copy_tree( + os.path.join(backup_dir, 'wal', 'master'), + os.path.join(backup_dir, 'wal', 'replica')) + + replica.slow_start(replica=True) + + self.switch_wal_segment(master) + self.switch_wal_segment(master) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,10) i') + + self.wait_until_replica_catch_with_master(master, replica) + + output = self.backup_node( + backup_dir, 'replica', replica, + options=[ + '--archive-timeout=30', + '--log-level-console=LOG', + '--no-validate', + '--stream'], + return_id=False) + + pgdata = self.pgdata_content(replica.data_dir) + + self.assertIn( + 'WARNING: Could not read WAL record at', + output) + + self.assertIn( + 'LOG: Found prior LSN:', + output) + + res1 = replica.safe_psql( + 'postgres', + 'select md5(fat_attr) from t1') + + replica.cleanup() + + self.restore_node(backup_dir, 'replica', replica) + pgdata_restored = self.pgdata_content(replica.data_dir) + + replica.slow_start() + + res2 = replica.safe_psql( + 'postgres', + 'select md5(fat_attr) from t1') + + self.assertEqual(res1, res2) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_replica_promote_1(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '1h', + 'wal_level': 'replica'}) + + if self.get_version(master) < self.version_to_num('9.6.0'): + self.del_test_dir(module_name, fname) + return unittest.skip( + 'Skipped because backup from replica is not supported in PG 9.5') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + # set replica True, so archive_mode 'always' is used. + self.set_archiving(backup_dir, 'master', master, replica=True) + master.slow_start() + + self.backup_node(backup_dir, 'master', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + + # Settings for Replica + self.set_replica(master, replica) + + replica.slow_start(replica=True) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,10) i') + + self.wait_until_replica_catch_with_master(master, replica) + + wal_file = os.path.join( + backup_dir, 'wal', 'master', '000000010000000000000004') + + wal_file_partial = os.path.join( + backup_dir, 'wal', 'master', '000000010000000000000004.partial') + + self.assertFalse(os.path.exists(wal_file)) + + replica.promote() + + while not os.path.exists(wal_file_partial): + sleep(1) + + self.switch_wal_segment(master) + + # sleep to be sure, that any partial timeout is expired + sleep(70) + + self.assertTrue( + os.path.exists(wal_file_partial), + "File {0} disappeared".format(wal_file)) + + self.assertTrue( + os.path.exists(wal_file_partial), + "File {0} disappeared".format(wal_file_partial)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_promote_2(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + # set replica True, so archive_mode 'always' is used. + self.set_archiving( + backup_dir, 'master', master, replica=True) + master.slow_start() + + self.backup_node(backup_dir, 'master', master) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + + # Settings for Replica + self.set_replica(master, replica) + self.set_auto_conf(replica, {'port': replica.port}) + + replica.slow_start(replica=True) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,1) i') + + self.wait_until_replica_catch_with_master(master, replica) + + replica.promote() + + self.backup_node( + backup_dir, 'master', replica, data_dir=replica.data_dir, + backup_type='page') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_promote_3(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + + master.slow_start() + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + + # Settings for Replica + self.set_replica(master, replica) + self.set_auto_conf(replica, {'port': replica.port}) + + replica.slow_start(replica=True) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(master, replica) + + self.add_instance(backup_dir, 'replica', replica) + + full_id = self.backup_node( + backup_dir, 'replica', + replica, options=['--stream']) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t2 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(master, replica) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + + replica.promote() + + # failing, because without archving, it is impossible to + # take multi-timeline backup. + try: + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of timeline switch " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: Cannot find valid backup on previous timelines, ' + 'WAL archive is not available' in e.message and + 'ERROR: Create new full backup before an incremental one' in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_promote_archive_delta(self): + """ + t3 /---D3--> + t2 /-------> + t1 --F---D1--D2-- + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '30s', + 'archive_timeout': '30s', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node1) + self.set_config( + backup_dir, 'node', options=['--archive-timeout=60s']) + self.set_archiving(backup_dir, 'node', node1) + + node1.slow_start() + + self.backup_node(backup_dir, 'node', node1, options=['--stream']) + + # Create replica + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + self.restore_node(backup_dir, 'node', node2, node2.data_dir) + + # Settings for Replica + self.set_replica(node1, node2) + self.set_auto_conf(node2, {'port': node2.port}) + self.set_archiving(backup_dir, 'node', node2, replica=True) + + node2.slow_start(replica=True) + + node1.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(node1, node2) + + node1.safe_psql( + 'postgres', + 'CREATE TABLE t2 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(node1, node2) + + # delta backup on replica on timeline 1 + delta1_id = self.backup_node( + backup_dir, 'node', node2, node2.data_dir, + 'delta', options=['--stream']) + + # delta backup on replica on timeline 1 + delta2_id = self.backup_node( + backup_dir, 'node', node2, node2.data_dir, 'delta') + + self.change_backup_status( + backup_dir, 'node', delta2_id, 'ERROR') + + # node2 is now master + node2.promote() + + node2.safe_psql( + 'postgres', + 'CREATE TABLE t3 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + + # node1 is now replica + node1.cleanup() + # kludge "backup_id=delta1_id" + self.restore_node( + backup_dir, 'node', node1, node1.data_dir, + backup_id=delta1_id, + options=[ + '--recovery-target-timeline=2', + '--recovery-target=latest']) + + # Settings for Replica + self.set_replica(node2, node1) + self.set_auto_conf(node1, {'port': node1.port}) + self.set_archiving(backup_dir, 'node', node1, replica=True) + + node1.slow_start(replica=True) + + node2.safe_psql( + 'postgres', + 'CREATE TABLE t4 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,30) i') + self.wait_until_replica_catch_with_master(node2, node1) + + # node1 is back to be a master + node1.promote() + + sleep(5) + + # delta backup on timeline 3 + self.backup_node( + backup_dir, 'node', node1, node1.data_dir, 'delta', + options=['--archive-timeout=60']) + + pgdata = self.pgdata_content(node1.data_dir) + + node1.cleanup() + self.restore_node(backup_dir, 'node', node1, node1.data_dir) + + pgdata_restored = self.pgdata_content(node1.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_replica_promote_archive_page(self): + """ + t3 /---P3--> + t2 /-------> + t1 --F---P1--P2-- + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '30s', + 'archive_timeout': '30s', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node1) + self.set_archiving(backup_dir, 'node', node1) + self.set_config( + backup_dir, 'node', options=['--archive-timeout=60s']) + + node1.slow_start() + + self.backup_node(backup_dir, 'node', node1, options=['--stream']) + + # Create replica + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + self.restore_node(backup_dir, 'node', node2, node2.data_dir) + + # Settings for Replica + self.set_replica(node1, node2) + self.set_auto_conf(node2, {'port': node2.port}) + self.set_archiving(backup_dir, 'node', node2, replica=True) + + node2.slow_start(replica=True) + + node1.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(node1, node2) + + node1.safe_psql( + 'postgres', + 'CREATE TABLE t2 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(node1, node2) + + # page backup on replica on timeline 1 + page1_id = self.backup_node( + backup_dir, 'node', node2, node2.data_dir, + 'page', options=['--stream']) + + # page backup on replica on timeline 1 + page2_id = self.backup_node( + backup_dir, 'node', node2, node2.data_dir, 'page') + + self.change_backup_status( + backup_dir, 'node', page2_id, 'ERROR') + + # node2 is now master + node2.promote() + + node2.safe_psql( + 'postgres', + 'CREATE TABLE t3 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + + # node1 is now replica + node1.cleanup() + # kludge "backup_id=page1_id" + self.restore_node( + backup_dir, 'node', node1, node1.data_dir, + backup_id=page1_id, + options=[ + '--recovery-target-timeline=2', + '--recovery-target=latest']) + + # Settings for Replica + self.set_replica(node2, node1) + self.set_auto_conf(node1, {'port': node1.port}) + self.set_archiving(backup_dir, 'node', node1, replica=True) + + node1.slow_start(replica=True) + + node2.safe_psql( + 'postgres', + 'CREATE TABLE t4 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,30) i') + self.wait_until_replica_catch_with_master(node2, node1) + + # node1 is back to be a master + node1.promote() + self.switch_wal_segment(node1) + + sleep(5) + + # delta3_id = self.backup_node( + # backup_dir, 'node', node2, node2.data_dir, 'delta') + # page backup on timeline 3 + page3_id = self.backup_node( + backup_dir, 'node', node1, node1.data_dir, 'page', + options=['--archive-timeout=60']) + + pgdata = self.pgdata_content(node1.data_dir) + + node1.cleanup() + self.restore_node(backup_dir, 'node', node1, node1.data_dir) + + pgdata_restored = self.pgdata_content(node1.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_parent_choosing(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'master'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + + master.slow_start() + + self.backup_node(backup_dir, 'master', master, options=['--stream']) + + # Create replica + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + self.restore_node(backup_dir, 'master', replica) + + # Settings for Replica + self.set_replica(master, replica) + self.set_auto_conf(replica, {'port': replica.port}) + + replica.slow_start(replica=True) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t1 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(master, replica) + + self.add_instance(backup_dir, 'replica', replica) + + full_id = self.backup_node( + backup_dir, 'replica', + replica, options=['--stream']) + + master.safe_psql( + 'postgres', + 'CREATE TABLE t2 AS ' + 'SELECT i, repeat(md5(i::text),5006056) AS fat_attr ' + 'FROM generate_series(0,20) i') + self.wait_until_replica_catch_with_master(master, replica) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + + replica.promote() + + # failing, because without archving, it is impossible to + # take multi-timeline backup. + try: + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of timeline switch " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: Cannot find valid backup on previous timelines, ' + 'WAL archive is not available' in e.message and + 'ERROR: Create new full backup before an incremental one' in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_instance_from_the_past(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + + node.slow_start() + + full_id = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=10) + self.backup_node(backup_dir, 'node', node, options=['--stream']) + node.cleanup() + + self.restore_node(backup_dir, 'node', node, backup_id=full_id) + node.slow_start() + + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because instance is from the past " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: Current START LSN' in e.message and + 'is lower than START LSN' in e.message and + 'It may indicate that we are trying to backup ' + 'PostgreSQL instance from the past' in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + +# TODO: +# null offset STOP LSN and latest record in previous segment is conrecord (manual only) +# archiving from promoted delayed replica diff --git a/tests/restore.py b/tests/restore.py new file mode 100644 index 000000000..9c105175e --- /dev/null +++ b/tests/restore.py @@ -0,0 +1,3411 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +import subprocess +from datetime import datetime +import sys +from time import sleep +from datetime import datetime, timedelta +import hashlib +import shutil +import json +from testgres import QueryException + + +module_name = 'restore' + + +class RestoreTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_restore_full_to_latest(self): + """recovery to latest from full backup""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + backup_id = self.backup_node(backup_dir, 'node', node) + + node.stop() + node.cleanup() + + # 1 - Test recovery from latest + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + # 2 - Test that recovery.conf was created + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + self.assertEqual(os.path.isfile(recovery_conf), True) + + node.slow_start() + + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_full_page_to_latest(self): + """recovery to latest from full + page backups""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="page") + + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_to_specific_timeline(self): + """recovery to target timeline""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + + backup_id = self.backup_node(backup_dir, 'node', node) + + target_tli = int( + node.get_control_data()["Latest checkpoint's TimeLineID"]) + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + options=['-T', '10', '-c', '2', '--no-vacuum']) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node) + + node.stop() + node.cleanup() + + # Correct Backup must be choosen for restore + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--timeline={0}".format(target_tli)] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + recovery_target_timeline = self.get_recovery_conf( + node)["recovery_target_timeline"] + self.assertEqual(int(recovery_target_timeline), target_tli) + + node.slow_start() + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_to_time(self): + """recovery to target time""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'TimeZone': 'Europe/Moscow'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + + backup_id = self.backup_node(backup_dir, 'node', node) + + target_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", '--time={0}'.format(target_time), + "--recovery-target-action=promote" + ] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_to_xid_inclusive(self): + """recovery to target xid""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a text)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + before = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") + with node.connect("postgres") as con: + res = con.execute("INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", '--xid={0}'.format(target_xid), + "--recovery-target-action=promote"] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + after = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + self.assertEqual( + len(node.execute("postgres", "SELECT * FROM tbl0005")), 1) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_to_xid_not_inclusive(self): + """recovery with target inclusive false""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a text)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + with node.connect("postgres") as con: + result = con.execute("INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = result[0][0] + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + '--xid={0}'.format(target_xid), + "--inclusive=false", + "--recovery-target-action=promote"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + self.assertEqual( + len(node.execute("postgres", "SELECT * FROM tbl0005")), 0) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_to_lsn_inclusive(self): + """recovery to target lsn""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + if self.get_version(node) < self.version_to_num('10.0'): + self.del_test_dir(module_name, fname) + return + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a int)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + before = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") + with node.connect("postgres") as con: + con.execute("INSERT INTO tbl0005 VALUES (1)") + con.commit() + res = con.execute("SELECT pg_current_wal_lsn()") + con.commit() + con.execute("INSERT INTO tbl0005 VALUES (2)") + con.commit() + xlogid, xrecoff = res[0][0].split('/') + xrecoff = hex(int(xrecoff, 16) + 1)[2:] + target_lsn = "{0}/{1}".format(xlogid, xrecoff) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", '--lsn={0}'.format(target_lsn), + "--recovery-target-action=promote"] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + + after = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + self.assertEqual( + len(node.execute("postgres", "SELECT * FROM tbl0005")), 2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_to_lsn_not_inclusive(self): + """recovery to target lsn""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + if self.get_version(node) < self.version_to_num('10.0'): + self.del_test_dir(module_name, fname) + return + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=2) + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a int)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + before = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") + with node.connect("postgres") as con: + con.execute("INSERT INTO tbl0005 VALUES (1)") + con.commit() + res = con.execute("SELECT pg_current_wal_lsn()") + con.commit() + con.execute("INSERT INTO tbl0005 VALUES (2)") + con.commit() + xlogid, xrecoff = res[0][0].split('/') + xrecoff = hex(int(xrecoff, 16) + 1)[2:] + target_lsn = "{0}/{1}".format(xlogid, xrecoff) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "--inclusive=false", + "-j", "4", '--lsn={0}'.format(target_lsn), + "--recovery-target-action=promote"] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + + after = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + self.assertEqual( + len(node.execute("postgres", "SELECT * FROM tbl0005")), 1) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_full_ptrack_archive(self): + """recovery to latest from archive full+ptrack backups""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + node.pgbench_init(scale=2) + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="ptrack") + + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_ptrack(self): + """recovery to latest from archive full+ptrack+ptrack backups""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + ptrack_enable=True) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + node.pgbench_init(scale=2) + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="ptrack") + + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_full_ptrack_stream(self): + """recovery in stream mode to latest from full + ptrack backups""" + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + node.pgbench_init(scale=2) + + self.backup_node(backup_dir, 'node', node, options=["--stream"]) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["--stream"]) + + before = node.execute("postgres", "SELECT * FROM pgbench_branches") + + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + after = node.execute("postgres", "SELECT * FROM pgbench_branches") + self.assertEqual(before, after) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_full_ptrack_under_load(self): + """ + recovery to latest from full + ptrack backups + with loads when ptrack backup do + """ + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + node.pgbench_init(scale=2) + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "8"] + ) + + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["--stream"]) + + pgbench.wait() + pgbench.stdout.close() + + bbalance = node.execute( + "postgres", "SELECT sum(bbalance) FROM pgbench_branches") + delta = node.execute( + "postgres", "SELECT sum(delta) FROM pgbench_history") + + self.assertEqual(bbalance, delta) + node.stop() + node.cleanup() + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + bbalance = node.execute( + "postgres", "SELECT sum(bbalance) FROM pgbench_branches") + delta = node.execute( + "postgres", "SELECT sum(delta) FROM pgbench_history") + self.assertEqual(bbalance, delta) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_full_under_load_ptrack(self): + """ + recovery to latest from full + page backups + with loads when full backup do + """ + if not self.ptrack: + return unittest.skip('Skipped because ptrack support is disabled') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if node.major_version >= 12: + node.safe_psql( + "postgres", + "CREATE EXTENSION ptrack") + + # wal_segment_size = self.guc_wal_segment_size(node) + node.pgbench_init(scale=2) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "8"] + ) + + self.backup_node(backup_dir, 'node', node) + + pgbench.wait() + pgbench.stdout.close() + + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type="ptrack", options=["--stream"]) + + bbalance = node.execute( + "postgres", "SELECT sum(bbalance) FROM pgbench_branches") + delta = node.execute( + "postgres", "SELECT sum(delta) FROM pgbench_history") + + self.assertEqual(bbalance, delta) + + node.stop() + node.cleanup() + # self.wrong_wal_clean(node, wal_segment_size) + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + node.slow_start() + bbalance = node.execute( + "postgres", "SELECT sum(bbalance) FROM pgbench_branches") + delta = node.execute( + "postgres", "SELECT sum(delta) FROM pgbench_history") + self.assertEqual(bbalance, delta) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_with_tablespace_mapping_1(self): + """recovery using tablespace-mapping option""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Create tablespace + tblspc_path = os.path.join(node.base_dir, "tblspc") + os.makedirs(tblspc_path) + with node.connect("postgres") as con: + con.connection.autocommit = True + con.execute("CREATE TABLESPACE tblspc LOCATION '%s'" % tblspc_path) + con.connection.autocommit = False + con.execute("CREATE TABLE test (id int) TABLESPACE tblspc") + con.execute("INSERT INTO test VALUES (1)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") + + # 1 - Try to restore to existing directory + node.stop() + try: + self.restore_node(backup_dir, 'node', node) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because restore destination is not empty.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Restore destination is not empty:', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # 2 - Try to restore to existing tablespace directory + tblspc_path_tmp = os.path.join(node.base_dir, "tblspc_tmp") + os.rename(tblspc_path, tblspc_path_tmp) + node.cleanup() + os.rename(tblspc_path_tmp, tblspc_path) + try: + self.restore_node(backup_dir, 'node', node) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because restore tablespace destination is " + "not empty.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: restore tablespace destination is not empty:', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # 3 - Restore using tablespace-mapping to not empty directory + tblspc_path_temp = os.path.join(node.base_dir, "tblspc_temp") + os.mkdir(tblspc_path_temp) + with open(os.path.join(tblspc_path_temp, 'file'), 'w+') as f: + f.close() + + try: + self.restore_node( + backup_dir, 'node', node, + options=["-T", "%s=%s" % (tblspc_path, tblspc_path_temp)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because restore tablespace destination is " + "not empty.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: restore tablespace destination is not empty:', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # 4 - Restore using tablespace-mapping + tblspc_path_new = os.path.join(node.base_dir, "tblspc_new") + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-T", "%s=%s" % (tblspc_path, tblspc_path_new)] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + + result = node.execute("postgres", "SELECT id FROM test") + self.assertEqual(result[0][0], 1) + + # 4 - Restore using tablespace-mapping using page backup + self.backup_node(backup_dir, 'node', node) + with node.connect("postgres") as con: + con.execute("INSERT INTO test VALUES (2)") + con.commit() + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="page") + + show_pb = self.show_pb(backup_dir, 'node') + self.assertEqual(show_pb[1]['status'], "OK") + self.assertEqual(show_pb[2]['status'], "OK") + + node.stop() + node.cleanup() + tblspc_path_page = os.path.join(node.base_dir, "tblspc_page") + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-T", "%s=%s" % (tblspc_path_new, tblspc_path_page)]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + result = node.execute("postgres", "SELECT id FROM test OFFSET 1") + self.assertEqual(result[0][0], 2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_with_tablespace_mapping_2(self): + """recovery using tablespace-mapping option and page backup""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Full backup + self.backup_node(backup_dir, 'node', node) + self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") + + # Create tablespace + tblspc_path = os.path.join(node.base_dir, "tblspc") + os.makedirs(tblspc_path) + with node.connect("postgres") as con: + con.connection.autocommit = True + con.execute("CREATE TABLESPACE tblspc LOCATION '%s'" % tblspc_path) + con.connection.autocommit = False + con.execute( + "CREATE TABLE tbl AS SELECT * " + "FROM generate_series(0,3) AS integer") + con.commit() + + # First page backup + self.backup_node(backup_dir, 'node', node, backup_type="page") + self.assertEqual(self.show_pb(backup_dir, 'node')[1]['status'], "OK") + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['backup-mode'], "PAGE") + + # Create tablespace table + with node.connect("postgres") as con: +# con.connection.autocommit = True +# con.execute("CHECKPOINT") +# con.connection.autocommit = False + con.execute("CREATE TABLE tbl1 (a int) TABLESPACE tblspc") + con.execute( + "INSERT INTO tbl1 SELECT * " + "FROM generate_series(0,3) AS integer") + con.commit() + + # Second page backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="page") + self.assertEqual(self.show_pb(backup_dir, 'node')[2]['status'], "OK") + self.assertEqual( + self.show_pb(backup_dir, 'node')[2]['backup-mode'], "PAGE") + + node.stop() + node.cleanup() + + tblspc_path_new = os.path.join(node.base_dir, "tblspc_new") + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-T", "%s=%s" % (tblspc_path, tblspc_path_new)]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + node.slow_start() + + count = node.execute("postgres", "SELECT count(*) FROM tbl") + self.assertEqual(count[0][0], 4) + count = node.execute("postgres", "SELECT count(*) FROM tbl1") + self.assertEqual(count[0][0], 4) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_archive_node_backup_stream_restore_to_recovery_time(self): + """ + make node with archiving, make stream backup, + make PITR to Recovery Time + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node( + backup_dir, 'node', node, options=["--stream"]) + node.safe_psql("postgres", "create table t_heap(a int)") + + node.stop() + node.cleanup() + + recovery_time = self.show_pb( + backup_dir, 'node', backup_id)['recovery-time'] + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", '--time={0}'.format(recovery_time), + "--recovery-target-action=promote" + ] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + + result = node.psql("postgres", 'select * from t_heap') + self.assertTrue('does not exist' in result[2].decode("utf-8")) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_archive_node_backup_stream_restore_to_recovery_time(self): + """ + make node with archiving, make stream backup, + make PITR to Recovery Time + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node( + backup_dir, 'node', node, options=["--stream"]) + node.safe_psql("postgres", "create table t_heap(a int)") + node.stop() + node.cleanup() + + recovery_time = self.show_pb( + backup_dir, 'node', backup_id)['recovery-time'] + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", '--time={0}'.format(recovery_time), + "--recovery-target-action=promote" + ] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + result = node.psql("postgres", 'select * from t_heap') + self.assertTrue('does not exist' in result[2].decode("utf-8")) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_archive_node_backup_stream_pitr(self): + """ + make node with archiving, make stream backup, + create table t_heap, make pitr to Recovery Time, + check that t_heap do not exists + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node( + backup_dir, 'node', node, options=["--stream"]) + node.safe_psql("postgres", "create table t_heap(a int)") + node.cleanup() + + recovery_time = self.show_pb( + backup_dir, 'node', backup_id)['recovery-time'] + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", '--time={0}'.format(recovery_time), + "--recovery-target-action=promote" + ] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + node.slow_start() + + result = node.psql("postgres", 'select * from t_heap') + self.assertEqual(True, 'does not exist' in result[2].decode("utf-8")) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_archive_node_backup_archive_pitr_2(self): + """ + make node with archiving, make archive backup, + create table t_heap, make pitr to Recovery Time, + check that t_heap do not exists + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node.safe_psql("postgres", "create table t_heap(a int)") + node.stop() + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + recovery_time = self.show_pb( + backup_dir, 'node', backup_id)['recovery-time'] + + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id), + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", '--time={0}'.format(recovery_time), + "--recovery-target-action=promote"] + ), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.set_auto_conf(node_restored, {'port': node_restored.port}) + + node_restored.slow_start() + + result = node_restored.psql("postgres", 'select * from t_heap') + self.assertTrue('does not exist' in result[2].decode("utf-8")) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_archive_restore_to_restore_point(self): + """ + make node with archiving, make archive backup, + create table t_heap, make pitr to Recovery Time, + check that t_heap do not exists + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select generate_series(0,10000)") + result = node.safe_psql( + "postgres", + "select * from t_heap") + node.safe_psql( + "postgres", "select pg_create_restore_point('savepoint')") + node.safe_psql( + "postgres", + "create table t_heap_1 as select generate_series(0,10000)") + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, + options=[ + "--recovery-target-name=savepoint", + "--recovery-target-action=promote"]) + + node.slow_start() + + result_new = node.safe_psql("postgres", "select * from t_heap") + res = node.psql("postgres", "select * from t_heap_1") + self.assertEqual( + res[0], 1, + "Table t_heap_1 should not exist in restored instance") + + self.assertEqual(result, result_new) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + # @unittest.expectedFailure + def test_zags_block_corrupt(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + conn = node.connect() + with node.connect("postgres") as conn: + + conn.execute( + "create table tbl(i int)") + conn.commit() + conn.execute( + "create index idx ON tbl (i)") + conn.commit() + conn.execute( + "insert into tbl select i from generate_series(0,400) as i") + conn.commit() + conn.execute( + "select pg_relation_size('idx')") + conn.commit() + conn.execute( + "delete from tbl where i < 100") + conn.commit() + conn.execute( + "explain analyze select i from tbl order by i") + conn.commit() + conn.execute( + "select i from tbl order by i") + conn.commit() + conn.execute( + "create extension pageinspect") + conn.commit() + print(conn.execute( + "select * from bt_page_stats('idx',1)")) + conn.commit() + conn.execute( + "insert into tbl select i from generate_series(0,100) as i") + conn.commit() + conn.execute( + "insert into tbl select i from generate_series(0,100) as i") + conn.commit() + conn.execute( + "insert into tbl select i from generate_series(0,100) as i") + conn.commit() + conn.execute( + "insert into tbl select i from generate_series(0,100) as i") + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored'), + initdb_params=['--data-checksums']) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored) + + self.set_auto_conf( + node_restored, + {'archive_mode': 'off', 'hot_standby': 'on', 'port': node_restored.port}) + + node_restored.slow_start() + + @unittest.skip("skip") + # @unittest.expectedFailure + def test_zags_block_corrupt_1(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off', + 'full_page_writes': 'on'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql('postgres', 'create table tbl(i int)') + + node.safe_psql('postgres', 'create index idx ON tbl (i)') + + node.safe_psql( + 'postgres', + 'insert into tbl select i from generate_series(0,100000) as i') + + node.safe_psql( + 'postgres', + 'delete from tbl where i%2 = 0') + + node.safe_psql( + 'postgres', + 'explain analyze select i from tbl order by i') + + node.safe_psql( + 'postgres', + 'select i from tbl order by i') + + node.safe_psql( + 'postgres', + 'create extension pageinspect') + + node.safe_psql( + 'postgres', + 'insert into tbl select i from generate_series(0,100) as i') + + node.safe_psql( + 'postgres', + 'insert into tbl select i from generate_series(0,100) as i') + + node.safe_psql( + 'postgres', + 'insert into tbl select i from generate_series(0,100) as i') + + node.safe_psql( + 'postgres', + 'insert into tbl select i from generate_series(0,100) as i') + + self.switch_wal_segment(node) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored'), + initdb_params=['--data-checksums']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored) + + self.set_auto_conf( + node_restored, + {'archive_mode': 'off', 'hot_standby': 'on', 'port': node_restored.port}) + + node_restored.slow_start() + + while True: + with open(node_restored.pg_log_file, 'r') as f: + if 'selected new timeline ID' in f.read(): + break + + # with open(node_restored.pg_log_file, 'r') as f: + # print(f.read()) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + self.compare_pgdata(pgdata, pgdata_restored) + +# pg_xlogdump_path = self.get_bin_path('pg_xlogdump') + +# pg_xlogdump = self.run_binary( +# [ +# pg_xlogdump_path, '-b', +# os.path.join(backup_dir, 'wal', 'node', '000000010000000000000003'), +# ' | ', 'grep', 'Btree', '' +# ], async=False) + + if pg_xlogdump.returncode: + self.assertFalse( + True, + 'Failed to start pg_wal_dump: {0}'.format( + pg_receivexlog.communicate()[1])) + + # @unittest.skip("skip") + def test_restore_chain(self): + """ + make node, take full backup, take several + ERROR delta backups, take valid delta backup, + restore must be successfull + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node( + backup_dir, 'node', node) + + # Take DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # Take ERROR DELTA + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--archive-timeout=0s']) + except ProbackupException as e: + pass + + # Take ERROR DELTA + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--archive-timeout=0s']) + except ProbackupException as e: + pass + + # Take DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # Take ERROR DELTA + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--archive-timeout=0s']) + except ProbackupException as e: + pass + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[0]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[1]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[2]['status'], + 'Backup STATUS should be "ERROR"') + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[3]['status'], + 'Backup STATUS should be "ERROR"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[4]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[5]['status'], + 'Backup STATUS should be "ERROR"') + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_chain_with_corrupted_backup(self): + """more complex test_restore_chain()""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node( + backup_dir, 'node', node) + + # Take DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Take ERROR DELTA + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--archive-timeout=0s']) + except ProbackupException as e: + pass + + # Take 1 DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # Take ERROR DELTA + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--archive-timeout=0s']) + except ProbackupException as e: + pass + + # Take 2 DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # Take ERROR DELTA + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--archive-timeout=0s']) + except ProbackupException as e: + pass + + # Take 3 DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # Corrupted 4 DELTA + corrupt_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # ORPHAN 5 DELTA + restore_target_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # ORPHAN 6 DELTA + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # NEXT FULL BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='full') + + # Next Delta + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # do corrupt 6 DELTA backup + file = os.path.join( + backup_dir, 'backups', 'node', + corrupt_id, 'database', 'global', 'pg_control') + + file_new = os.path.join(backup_dir, 'pg_control') + os.rename(file, file_new) + + # RESTORE BACKUP + node.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node, backup_id=restore_target_id) + self.assertEqual( + 1, 0, + "Expecting Error because restore backup is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Backup {0} is orphan'.format(restore_target_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[0]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[1]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[2]['status'], + 'Backup STATUS should be "ERROR"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[3]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[4]['status'], + 'Backup STATUS should be "ERROR"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[5]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node')[6]['status'], + 'Backup STATUS should be "ERROR"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[7]['status'], + 'Backup STATUS should be "OK"') + + # corruption victim + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node')[8]['status'], + 'Backup STATUS should be "CORRUPT"') + + # orphaned child + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node')[9]['status'], + 'Backup STATUS should be "ORPHAN"') + + # orphaned child + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node')[10]['status'], + 'Backup STATUS should be "ORPHAN"') + + # next FULL + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[11]['status'], + 'Backup STATUS should be "OK"') + + # next DELTA + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[12]['status'], + 'Backup STATUS should be "OK"') + + node.cleanup() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_backup_from_future(self): + """more complex test_restore_chain()""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=5) + # pgbench = node.pgbench(options=['-T', '20', '-c', '2']) + # pgbench.wait() + + # Take PAGE from future + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + with open( + os.path.join( + backup_dir, 'backups', 'node', + backup_id, "backup.control"), "a") as conf: + conf.write("start-time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() + timedelta(days=3))) + + # rename directory + new_id = self.show_pb(backup_dir, 'node')[1]['id'] + + os.rename( + os.path.join(backup_dir, 'backups', 'node', backup_id), + os.path.join(backup_dir, 'backups', 'node', new_id)) + + pgbench = node.pgbench(options=['-T', '7', '-c', '1', '--no-vacuum']) + pgbench.wait() + + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + self.restore_node(backup_dir, 'node', node, backup_id=backup_id) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_target_immediate_stream(self): + """ + correct handling of immediate recovery target + for STREAM backups + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Take delta + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + # restore delta backup + node.cleanup() + self.restore_node( + backup_dir, 'node', node, options=['--immediate']) + + self.assertTrue( + os.path.isfile(recovery_conf), + "File {0} do not exists".format(recovery_conf)) + + # restore delta backup + node.cleanup() + self.restore_node( + backup_dir, 'node', node, options=['--recovery-target=immediate']) + + self.assertTrue( + os.path.isfile(recovery_conf), + "File {0} do not exists".format(recovery_conf)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_target_immediate_archive(self): + """ + correct handling of immediate recovery target + for ARCHIVE backups + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node( + backup_dir, 'node', node) + + # Take delta + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + # restore page backup + node.cleanup() + self.restore_node( + backup_dir, 'node', node, options=['--immediate']) + + # For archive backup with immediate recovery target + # recovery.conf is mandatory + with open(recovery_conf, 'r') as f: + self.assertIn("recovery_target = 'immediate'", f.read()) + + # restore page backup + node.cleanup() + self.restore_node( + backup_dir, 'node', node, options=['--recovery-target=immediate']) + + # For archive backup with immediate recovery target + # recovery.conf is mandatory + with open(recovery_conf, 'r') as f: + self.assertIn("recovery_target = 'immediate'", f.read()) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_target_latest_archive(self): + """ + make sure that recovery_target 'latest' + is default recovery target + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + # restore + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # hash_1 = hashlib.md5( + # open(recovery_conf, 'rb').read()).hexdigest() + + with open(recovery_conf, 'r') as f: + content_1 = f.read() + + # restore + node.cleanup() + + self.restore_node(backup_dir, 'node', node, options=['--recovery-target=latest']) + + # hash_2 = hashlib.md5( + # open(recovery_conf, 'rb').read()).hexdigest() + + with open(recovery_conf, 'r') as f: + content_2 = f.read() + + self.assertEqual(content_1, content_2) + + # self.assertEqual(hash_1, hash_2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_target_new_options(self): + """ + check that new --recovery-target-* + options are working correctly + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + node.pgbench_init(scale=2) + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + node.safe_psql( + "postgres", + "CREATE TABLE tbl0005 (a text)") + + node.safe_psql( + "postgres", "select pg_create_restore_point('savepoint')") + + target_name = 'savepoint' + + target_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + with node.connect("postgres") as con: + res = con.execute( + "INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + with node.connect("postgres") as con: + con.execute("INSERT INTO tbl0005 VALUES (1)") + con.commit() + if self.get_version(node) > self.version_to_num('10.0'): + res = con.execute("SELECT pg_current_wal_lsn()") + else: + res = con.execute("SELECT pg_current_xlog_location()") + + con.commit() + con.execute("INSERT INTO tbl0005 VALUES (2)") + con.commit() + xlogid, xrecoff = res[0][0].split('/') + xrecoff = hex(int(xrecoff, 16) + 1)[2:] + target_lsn = "{0}/{1}".format(xlogid, xrecoff) + + # Restore with recovery target time + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target-time={0}'.format(target_time), + "--recovery-target-action=promote", + '--recovery-target-timeline=1', + ]) + + with open(recovery_conf, 'r') as f: + recovery_conf_content = f.read() + + self.assertIn( + "recovery_target_time = '{0}'".format(target_time), + recovery_conf_content) + + self.assertIn( + "recovery_target_action = 'promote'", + recovery_conf_content) + + self.assertIn( + "recovery_target_timeline = '1'", + recovery_conf_content) + + node.slow_start() + + # Restore with recovery target xid + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + "--recovery-target-action=promote", + '--recovery-target-timeline=1', + ]) + + with open(recovery_conf, 'r') as f: + recovery_conf_content = f.read() + + self.assertIn( + "recovery_target_xid = '{0}'".format(target_xid), + recovery_conf_content) + + self.assertIn( + "recovery_target_action = 'promote'", + recovery_conf_content) + + self.assertIn( + "recovery_target_timeline = '1'", + recovery_conf_content) + + node.slow_start() + + # Restore with recovery target name + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target-name={0}'.format(target_name), + "--recovery-target-action=promote", + '--recovery-target-timeline=1', + ]) + + with open(recovery_conf, 'r') as f: + recovery_conf_content = f.read() + + self.assertIn( + "recovery_target_name = '{0}'".format(target_name), + recovery_conf_content) + + self.assertIn( + "recovery_target_action = 'promote'", + recovery_conf_content) + + self.assertIn( + "recovery_target_timeline = '1'", + recovery_conf_content) + + node.slow_start() + + # Restore with recovery target lsn + if self.get_version(node) >= 100000: + + node.cleanup() + self.restore_node( + backup_dir, 'node', node, + options=[ + '--recovery-target-lsn={0}'.format(target_lsn), + "--recovery-target-action=promote", + '--recovery-target-timeline=1', + ]) + + with open(recovery_conf, 'r') as f: + recovery_conf_content = f.read() + + self.assertIn( + "recovery_target_lsn = '{0}'".format(target_lsn), + recovery_conf_content) + + self.assertIn( + "recovery_target_action = 'promote'", + recovery_conf_content) + + self.assertIn( + "recovery_target_timeline = '1'", + recovery_conf_content) + + node.slow_start() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_smart_restore(self): + """ + make node, create database, take full backup, drop database, + take incremental backup and restore it, + make sure that files from dropped database are not + copied during restore + https://github.com/postgrespro/pg_probackup/issues/63 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # create database + node.safe_psql( + "postgres", + "CREATE DATABASE testdb") + + # take FULL backup + full_id = self.backup_node(backup_dir, 'node', node) + + # drop database + node.safe_psql( + "postgres", + "DROP DATABASE testdb") + + # take PAGE backup + page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # restore PAGE backup + node.cleanup() + self.restore_node( + backup_dir, 'node', node, backup_id=page_id, + options=['--no-validate', '--log-level-file=VERBOSE']) + + logfile = os.path.join(backup_dir, 'log', 'pg_probackup.log') + with open(logfile, 'r') as f: + logfile_content = f.read() + + # get delta between FULL and PAGE filelists + filelist_full = self.get_backup_filelist( + backup_dir, 'node', full_id) + + filelist_page = self.get_backup_filelist( + backup_dir, 'node', page_id) + + filelist_diff = self.get_backup_filelist_diff( + filelist_full, filelist_page) + + for file in filelist_diff: + self.assertNotIn(file, logfile_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_pg_11_group_access(self): + """ + test group access for PG >= 11 + """ + if self.pg_config_version < self.version_to_num('11.0'): + return unittest.skip('You need PostgreSQL >= 11 for this test') + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=[ + '--data-checksums', + '--allow-group-access']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # take FULL backup + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + # restore backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored) + + # compare pgdata permissions + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_concurrent_drop_table(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=1) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--compress']) + + # DELTA backup + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--stream', '--compress', '--no-validate'], + gdb=True) + + gdb.set_breakpoint('backup_data_file') + gdb.run_until_break() + + node.safe_psql( + 'postgres', + 'DROP TABLE pgbench_accounts') + + # do checkpoint to guarantee filenode removal + node.safe_psql( + 'postgres', + 'CHECKPOINT') + + gdb.remove_all_breakpoints() + gdb.continue_execution_until_exit() + + pgdata = self.pgdata_content(node.data_dir) + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, options=['--no-validate']) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_lost_non_data_file(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'postgresql.auto.conf') + + os.remove(file) + + node.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node, options=['--no-validate']) + self.assertEqual( + 1, 0, + "Expecting Error because of non-data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'No such file or directory', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'ERROR: Backup files restoring failed', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_partial_restore_exclude(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + + db_list_splitted = db_list_raw.splitlines() + + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + # FULL backup + backup_id = self.backup_node(backup_dir, 'node', node) + pgdata = self.pgdata_content(node.data_dir) + + # restore FULL backup + node_restored_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_1')) + node_restored_1.cleanup() + + try: + self.restore_node( + backup_dir, 'node', + node_restored_1, options=[ + "--db-include=db1", + "--db-exclude=db2"]) + self.assertEqual( + 1, 0, + "Expecting Error because of 'db-exclude' and 'db-include'.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You cannot specify '--db-include' " + "and '--db-exclude' together", e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.restore_node( + backup_dir, 'node', node_restored_1) + + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + self.compare_pgdata(pgdata, pgdata_restored_1) + + db1_path = os.path.join( + node_restored_1.data_dir, 'base', db_list['db1']) + db5_path = os.path.join( + node_restored_1.data_dir, 'base', db_list['db5']) + + self.truncate_every_file_in_dir(db1_path) + self.truncate_every_file_in_dir(db5_path) + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + + node_restored_2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_2')) + node_restored_2.cleanup() + + self.restore_node( + backup_dir, 'node', + node_restored_2, options=[ + "--db-exclude=db1", + "--db-exclude=db5"]) + + pgdata_restored_2 = self.pgdata_content(node_restored_2.data_dir) + self.compare_pgdata(pgdata_restored_1, pgdata_restored_2) + + self.set_auto_conf(node_restored_2, {'port': node_restored_2.port}) + + node_restored_2.slow_start() + + node_restored_2.safe_psql( + 'postgres', + 'select 1') + + try: + node_restored_2.safe_psql( + 'db1', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + try: + node_restored_2.safe_psql( + 'db5', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + with open(node_restored_2.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('PANIC', output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_partial_restore_exclude_tablespace(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + cat_version = node.get_control_data()["Catalog version number"] + version_specific_dir = 'PG_' + node.major_version_str + '_' + cat_version + + # PG_10_201707211 + # pg_tblspc/33172/PG_9.5_201510051/16386/ + + self.create_tblspace_in_node(node, 'somedata') + + node_tablespace = self.get_tblspace_path(node, 'somedata') + + tbl_oid = node.safe_psql( + 'postgres', + "SELECT oid " + "FROM pg_tablespace " + "WHERE spcname = 'somedata'").rstrip() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0} tablespace somedata'.format(i)) + + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + + db_list_splitted = db_list_raw.splitlines() + + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + # FULL backup + backup_id = self.backup_node(backup_dir, 'node', node) + pgdata = self.pgdata_content(node.data_dir) + + # restore FULL backup + node_restored_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_1')) + node_restored_1.cleanup() + + node1_tablespace = self.get_tblspace_path(node_restored_1, 'somedata') + + self.restore_node( + backup_dir, 'node', + node_restored_1, options=[ + "-T", "{0}={1}".format( + node_tablespace, node1_tablespace)]) + + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + self.compare_pgdata(pgdata, pgdata_restored_1) + + # truncate every db + for db in db_list: + # with exception below + if db in ['db1', 'db5']: + self.truncate_every_file_in_dir( + os.path.join( + node_restored_1.data_dir, 'pg_tblspc', + tbl_oid, version_specific_dir, db_list[db])) + + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + + node_restored_2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_2')) + node_restored_2.cleanup() + node2_tablespace = self.get_tblspace_path(node_restored_2, 'somedata') + + self.restore_node( + backup_dir, 'node', + node_restored_2, options=[ + "--db-exclude=db1", + "--db-exclude=db5", + "-T", "{0}={1}".format( + node_tablespace, node2_tablespace)]) + + pgdata_restored_2 = self.pgdata_content(node_restored_2.data_dir) + self.compare_pgdata(pgdata_restored_1, pgdata_restored_2) + + self.set_auto_conf(node_restored_2, {'port': node_restored_2.port}) + + node_restored_2.slow_start() + + node_restored_2.safe_psql( + 'postgres', + 'select 1') + + try: + node_restored_2.safe_psql( + 'db1', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + try: + node_restored_2.safe_psql( + 'db5', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + with open(node_restored_2.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('PANIC', output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_partial_restore_include(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + + db_list_splitted = db_list_raw.splitlines() + + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + # FULL backup + backup_id = self.backup_node(backup_dir, 'node', node) + pgdata = self.pgdata_content(node.data_dir) + + # restore FULL backup + node_restored_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_1')) + node_restored_1.cleanup() + + try: + self.restore_node( + backup_dir, 'node', + node_restored_1, options=[ + "--db-include=db1", + "--db-exclude=db2"]) + self.assertEqual( + 1, 0, + "Expecting Error because of 'db-exclude' and 'db-include'.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You cannot specify '--db-include' " + "and '--db-exclude' together", e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.restore_node( + backup_dir, 'node', node_restored_1) + + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + self.compare_pgdata(pgdata, pgdata_restored_1) + + # truncate every db + for db in db_list: + # with exception below + if db in ['template0', 'template1', 'postgres', 'db1', 'db5']: + continue + self.truncate_every_file_in_dir( + os.path.join( + node_restored_1.data_dir, 'base', db_list[db])) + + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + + node_restored_2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_2')) + node_restored_2.cleanup() + + self.restore_node( + backup_dir, 'node', + node_restored_2, options=[ + "--db-include=db1", + "--db-include=db5", + "--db-include=postgres"]) + + pgdata_restored_2 = self.pgdata_content(node_restored_2.data_dir) + self.compare_pgdata(pgdata_restored_1, pgdata_restored_2) + + self.set_auto_conf(node_restored_2, {'port': node_restored_2.port}) + node_restored_2.slow_start() + + node_restored_2.safe_psql( + 'db1', + 'select 1') + + node_restored_2.safe_psql( + 'db5', + 'select 1') + + node_restored_2.safe_psql( + 'template1', + 'select 1') + + try: + node_restored_2.safe_psql( + 'db2', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + try: + node_restored_2.safe_psql( + 'db10', + 'select 1') + except QueryException as e: + self.assertIn('FATAL', e.message) + + with open(node_restored_2.pg_log_file, 'r') as f: + output = f.read() + + self.assertNotIn('PANIC', output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_partial_restore_backward_compatibility_1(self): + """ + old binary should be of version < 2.2.0 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + node.slow_start() + + # create databases + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # FULL backup with old binary, without partial restore support + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + try: + self.restore_node( + backup_dir, 'node', + node_restored, options=[ + "--db-exclude=db5"]) + self.assertEqual( + 1, 0, + "Expecting Error because backup do not support partial restore.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} doesn't contain a database_map, " + "partial restore is impossible".format(backup_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.restore_node(backup_dir, 'node', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # incremental backup with partial restore support + for i in range(11, 15, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # get db list + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + db_list_splitted = db_list_raw.splitlines() + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + # get etalon + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored) + self.truncate_every_file_in_dir( + os.path.join( + node_restored.data_dir, 'base', db_list['db5'])) + self.truncate_every_file_in_dir( + os.path.join( + node_restored.data_dir, 'base', db_list['db14'])) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + # get new node + node_restored_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_1')) + node_restored_1.cleanup() + + self.restore_node( + backup_dir, 'node', + node_restored_1, options=[ + "--db-exclude=db5", + "--db-exclude=db14"]) + + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + + self.compare_pgdata(pgdata_restored, pgdata_restored_1) + + def test_partial_restore_backward_compatibility_merge(self): + """ + old binary should be of version < 2.2.0 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + node.slow_start() + + # create databases + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # FULL backup with old binary, without partial restore support + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=['--stream']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + try: + self.restore_node( + backup_dir, 'node', + node_restored, options=[ + "--db-exclude=db5"]) + self.assertEqual( + 1, 0, + "Expecting Error because backup do not support partial restore.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} doesn't contain a database_map, " + "partial restore is impossible.".format(backup_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.restore_node(backup_dir, 'node', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # incremental backup with partial restore support + for i in range(11, 15, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # get db list + db_list_raw = node.safe_psql( + 'postgres', + 'SELECT to_json(a) ' + 'FROM (SELECT oid, datname FROM pg_database) a').rstrip() + db_list_splitted = db_list_raw.splitlines() + db_list = {} + for line in db_list_splitted: + line = json.loads(line) + db_list[line['datname']] = line['oid'] + + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--stream']) + + # get etalon + node_restored.cleanup() + self.restore_node(backup_dir, 'node', node_restored) + self.truncate_every_file_in_dir( + os.path.join( + node_restored.data_dir, 'base', db_list['db5'])) + self.truncate_every_file_in_dir( + os.path.join( + node_restored.data_dir, 'base', db_list['db14'])) + pgdata_restored = self.pgdata_content(node_restored.data_dir) + + # get new node + node_restored_1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored_1')) + node_restored_1.cleanup() + + # merge + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + self.restore_node( + backup_dir, 'node', + node_restored_1, options=[ + "--db-exclude=db5", + "--db-exclude=db14"]) + pgdata_restored_1 = self.pgdata_content(node_restored_1.data_dir) + + self.compare_pgdata(pgdata_restored, pgdata_restored_1) + + def test_empty_and_mangled_database_map(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + + node.slow_start() + + # create databases + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # FULL backup with database_map + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + pgdata = self.pgdata_content(node.data_dir) + + # truncate database_map + path = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'database_map') + with open(path, "w") as f: + f.close() + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=["--db-include=db1", '--no-validate']) + self.assertEqual( + 1, 0, + "Expecting Error because database_map is empty.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} has empty or mangled database_map, " + "partial restore is impossible".format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=["--db-exclude=db1", '--no-validate']) + self.assertEqual( + 1, 0, + "Expecting Error because database_map is empty.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} has empty or mangled database_map, " + "partial restore is impossible".format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # mangle database_map + with open(path, "w") as f: + f.write("42") + f.close() + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=["--db-include=db1", '--no-validate']) + self.assertEqual( + 1, 0, + "Expecting Error because database_map is empty.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: field "dbOid" is not found in the line 42 of ' + 'the file backup_content.control', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=["--db-exclude=db1", '--no-validate']) + self.assertEqual( + 1, 0, + "Expecting Error because database_map is empty.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: field "dbOid" is not found in the line 42 of ' + 'the file backup_content.control', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # check that simple restore is still possible + self.restore_node( + backup_dir, 'node', node_restored, options=['--no-validate']) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + def test_missing_database_map(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + ptrack_enable=self.ptrack, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + + node.slow_start() + + # create databases + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + node.safe_psql( + "postgres", + "CREATE DATABASE backupdb") + + # PG 9.5 + if self.get_version(node) < 90600: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.timestamptz(timestamp with time zone, integer) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + # PG 9.6 + elif self.get_version(node) > 90600 and self.get_version(node) < 100000: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.timestamptz(timestamp with time zone, integer) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_xlog() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_xlog_replay_location() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + # >= 10 + else: + node.safe_psql( + 'backupdb', + "REVOKE ALL ON DATABASE backupdb from PUBLIC; " + "REVOKE ALL ON SCHEMA public from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; " + "REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; " + "REVOKE ALL ON SCHEMA information_schema from PUBLIC; " + "REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; " + "REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; " + "CREATE ROLE backup WITH LOGIN REPLICATION; " + "GRANT CONNECT ON DATABASE backupdb to backup; " + "GRANT USAGE ON SCHEMA pg_catalog TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; " + "GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack + "GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.current_setting(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_start_backup(text, boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_stop_backup(boolean, boolean) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_create_restore_point(text) TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_switch_wal() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.pg_last_wal_replay_lsn() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_current_snapshot() TO backup; " + "GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;" + ) + + if self.ptrack: + fnames = [] + if node.major_version < 12: + fnames += [ + 'pg_catalog.oideq(oid, oid)', + 'pg_catalog.ptrack_version()', + 'pg_catalog.pg_ptrack_clear()', + 'pg_catalog.pg_ptrack_control_lsn()', + 'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)', + 'pg_catalog.pg_ptrack_get_and_clear(oid, oid)', + 'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)' + ] + else: + # TODO why backup works without these grants ? +# fnames += [ +# 'pg_ptrack_get_pagemapset(pg_lsn)', +# 'pg_ptrack_control_lsn()', +# 'pg_ptrack_get_block(oid, oid, oid, bigint)' +# ] + node.safe_psql( + "backupdb", + "CREATE EXTENSION ptrack WITH SCHEMA pg_catalog") + + for fname in fnames: + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION {0} TO backup".format(fname)) + + if ProbackupTest.enterprise: + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_edition() TO backup") + + node.safe_psql( + "backupdb", + "GRANT EXECUTE ON FUNCTION pg_catalog.pgpro_version() TO backup") + + # FULL backup without database_map + backup_id = self.backup_node( + backup_dir, 'node', node, datname='backupdb', + options=['--stream', "-U", "backup", '--log-level-file=verbose']) + + pgdata = self.pgdata_content(node.data_dir) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + # backup has missing database_map and that is legal + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=["--db-exclude=db5", "--db-exclude=db9"]) + self.assertEqual( + 1, 0, + "Expecting Error because user do not have pg_database access.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} doesn't contain a database_map, " + "partial restore is impossible.".format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=["--db-include=db1"]) + self.assertEqual( + 1, 0, + "Expecting Error because user do not have pg_database access.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} doesn't contain a database_map, " + "partial restore is impossible.".format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # check that simple restore is still possible + self.restore_node(backup_dir, 'node', node_restored) + + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_stream_restore_command_option(self): + """ + correct handling of restore command options + when restoring STREAM backup + + 1. Restore STREAM backup with --restore-command only + parameter, check that PostgreSQL recovery uses + restore_command to obtain WAL from archive. + + 2. Restore STREAM backup wuth --restore-command + as replica, check that PostgreSQL recovery uses + restore_command to obtain WAL from archive. + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'max_wal_size': '32MB'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(node.data_dir, 'recovery.conf') + + # Take FULL + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=5) + + node.safe_psql( + 'postgres', + 'create table t1()') + + # restore backup + node.cleanup() + shutil.rmtree(os.path.join(node.logs_dir)) + + restore_cmd = self.get_restore_command(backup_dir, 'node', node) + + self.restore_node( + backup_dir, 'node', node, + options=[ + '--restore-command={0}'.format(restore_cmd)]) + + self.assertTrue( + os.path.isfile(recovery_conf), + "File '{0}' do not exists".format(recovery_conf)) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_signal = os.path.join(node.data_dir, 'recovery.signal') + self.assertTrue( + os.path.isfile(recovery_signal), + "File '{0}' do not exists".format(recovery_signal)) + + node.slow_start() + + node.safe_psql( + 'postgres', + 'select * from t1') + + timeline_id = node.safe_psql( + 'postgres', + 'select timeline_id from pg_control_checkpoint()').rstrip() + + self.assertEqual('2', timeline_id) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_primary_conninfo(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=1) + + #primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + str_conninfo='host=192.168.1.50 port=5432 user=foo password=foopass' + + self.restore_node( + backup_dir, 'node', replica, + options=['-R', '--primary-conninfo={0}'.format(str_conninfo)]) + + if self.get_version(node) >= self.version_to_num('12.0'): + standby_signal = os.path.join(replica.data_dir, 'standby.signal') + self.assertTrue( + os.path.isfile(standby_signal), + "File '{0}' do not exists".format(standby_signal)) + + if self.get_version(node) >= self.version_to_num('12.0'): + recovery_conf = os.path.join(replica.data_dir, 'probackup_recovery.conf') + else: + recovery_conf = os.path.join(replica.data_dir, 'recovery.conf') + + with open(os.path.join(replica.data_dir, recovery_conf), 'r') as f: + recovery_conf_content = f.read() + + self.assertIn(str_conninfo, recovery_conf_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_primary_slot_info(self): + """ + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=1) + + replica = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'replica')) + replica.cleanup() + + node.safe_psql( + "SELECT pg_create_physical_replication_slot('master_slot')") + + self.restore_node( + backup_dir, 'node', replica, + options=['-R', '--primary-slot-name=master_slot']) + + self.set_auto_conf(replica, {'port': replica.port}) + self.set_auto_conf(replica, {'hot_standby': 'on'}) + + if self.get_version(node) >= self.version_to_num('12.0'): + standby_signal = os.path.join(replica.data_dir, 'standby.signal') + self.assertTrue( + os.path.isfile(standby_signal), + "File '{0}' do not exists".format(standby_signal)) + + replica.slow_start(replica=True) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/restore_test.py b/tests/restore_test.py deleted file mode 100644 index c33a1e299..000000000 --- a/tests/restore_test.py +++ /dev/null @@ -1,1243 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException -import subprocess -from datetime import datetime -import sys -import time - - -module_name = 'restore' - - -class RestoreTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_restore_full_to_latest(self): - """recovery to latest from full backup""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - backup_id = self.backup_node(backup_dir, 'node', node) - - node.stop() - node.cleanup() - - # 1 - Test recovery from latest - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=["-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - # 2 - Test that recovery.conf was created - recovery_conf = os.path.join(node.data_dir, "recovery.conf") - self.assertEqual(os.path.isfile(recovery_conf), True) - - node.slow_start() - - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_full_page_to_latest(self): - """recovery to latest from full + page backups""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="page") - - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=["-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_to_specific_timeline(self): - """recovery to target timeline""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - - backup_id = self.backup_node(backup_dir, 'node', node) - - target_tli = int( - node.get_control_data()["Latest checkpoint's TimeLineID"]) - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=["-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - options=['-T', '10', '-c', '2', '--no-vacuum']) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node) - - node.stop() - node.cleanup() - - # Correct Backup must be choosen for restore - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", "--timeline={0}".format(target_tli), - "--recovery-target-action=promote"] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - recovery_target_timeline = self.get_recovery_conf( - node)["recovery_target_timeline"] - self.assertEqual(int(recovery_target_timeline), target_tli) - - node.slow_start() - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_to_time(self): - """recovery to target time""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.append_conf("postgresql.auto.conf", "TimeZone = Europe/Moscow") - node.start() - - node.pgbench_init(scale=2) - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - - backup_id = self.backup_node(backup_dir, 'node', node) - - target_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--time={0}'.format(target_time), - "--recovery-target-action=promote" - ] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_to_xid_inclusive(self): - """recovery to target xid""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a text)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - before = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") - with node.connect("postgres") as con: - res = con.execute("INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") - con.commit() - target_xid = res[0][0] - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--xid={0}'.format(target_xid), - "--recovery-target-action=promote"] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - after = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - self.assertEqual( - len(node.execute("postgres", "SELECT * FROM tbl0005")), 1) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_to_xid_not_inclusive(self): - """recovery with target inclusive false""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on', - 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a text)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - with node.connect("postgres") as con: - result = con.execute("INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") - con.commit() - target_xid = result[0][0] - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", - '--xid={0}'.format(target_xid), - "--inclusive=false", - "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - self.assertEqual( - len(node.execute("postgres", "SELECT * FROM tbl0005")), 0) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_to_lsn_inclusive(self): - """recovery to target lsn""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - if self.get_version(node) < self.version_to_num('10.0'): - self.del_test_dir(module_name, fname) - return - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a int)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - before = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") - with node.connect("postgres") as con: - con.execute("INSERT INTO tbl0005 VALUES (1)") - con.commit() - res = con.execute("SELECT pg_current_wal_lsn()") - con.commit() - con.execute("INSERT INTO tbl0005 VALUES (2)") - con.commit() - xlogid, xrecoff = res[0][0].split('/') - xrecoff = hex(int(xrecoff, 16) + 1)[2:] - target_lsn = "{0}/{1}".format(xlogid, xrecoff) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--lsn={0}'.format(target_lsn), - "--recovery-target-action=promote"] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - - after = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - self.assertEqual( - len(node.execute("postgres", "SELECT * FROM tbl0005")), 2) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_to_lsn_not_inclusive(self): - """recovery to target lsn""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - if self.get_version(node) < self.version_to_num('10.0'): - self.del_test_dir(module_name, fname) - return - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a int)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - before = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") - with node.connect("postgres") as con: - con.execute("INSERT INTO tbl0005 VALUES (1)") - con.commit() - res = con.execute("SELECT pg_current_wal_lsn()") - con.commit() - con.execute("INSERT INTO tbl0005 VALUES (2)") - con.commit() - xlogid, xrecoff = res[0][0].split('/') - xrecoff = hex(int(xrecoff, 16) + 1)[2:] - target_lsn = "{0}/{1}".format(xlogid, xrecoff) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "--inclusive=false", - "-j", "4", '--lsn={0}'.format(target_lsn), - "--recovery-target-action=promote"] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - - after = node.safe_psql("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - self.assertEqual( - len(node.execute("postgres", "SELECT * FROM tbl0005")), 1) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_full_ptrack_archive(self): - """recovery to latest from archive full+ptrack backups""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="ptrack") - - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_ptrack(self): - """recovery to latest from archive full+ptrack+ptrack backups""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="ptrack") - - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_full_ptrack_stream(self): - """recovery in stream mode to latest from full + ptrack backups""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on', - 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - - self.backup_node(backup_dir, 'node', node, options=["--stream"]) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - backup_id = self.backup_node( - backup_dir, 'node', node, - backup_type="ptrack", options=["--stream"]) - - before = node.execute("postgres", "SELECT * FROM pgbench_branches") - - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=["-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - after = node.execute("postgres", "SELECT * FROM pgbench_branches") - self.assertEqual(before, after) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_full_ptrack_under_load(self): - """ - recovery to latest from full + ptrack backups - with loads when ptrack backup do - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on', - 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "8"] - ) - - backup_id = self.backup_node( - backup_dir, 'node', node, - backup_type="ptrack", options=["--stream"]) - - pgbench.wait() - pgbench.stdout.close() - - bbalance = node.execute( - "postgres", "SELECT sum(bbalance) FROM pgbench_branches") - delta = node.execute( - "postgres", "SELECT sum(delta) FROM pgbench_history") - - self.assertEqual(bbalance, delta) - node.stop() - node.cleanup() - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=["-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - bbalance = node.execute( - "postgres", "SELECT sum(bbalance) FROM pgbench_branches") - delta = node.execute( - "postgres", "SELECT sum(delta) FROM pgbench_history") - self.assertEqual(bbalance, delta) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_full_under_load_ptrack(self): - """ - recovery to latest from full + page backups - with loads when full backup do - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on', - 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # wal_segment_size = self.guc_wal_segment_size(node) - node.pgbench_init(scale=2) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "8"] - ) - - self.backup_node(backup_dir, 'node', node) - - pgbench.wait() - pgbench.stdout.close() - - backup_id = self.backup_node( - backup_dir, 'node', node, - backup_type="ptrack", options=["--stream"]) - - bbalance = node.execute( - "postgres", "SELECT sum(bbalance) FROM pgbench_branches") - delta = node.execute( - "postgres", "SELECT sum(delta) FROM pgbench_history") - - self.assertEqual(bbalance, delta) - - node.stop() - node.cleanup() - # self.wrong_wal_clean(node, wal_segment_size) - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=["-j", "4", "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - node.slow_start() - bbalance = node.execute( - "postgres", "SELECT sum(bbalance) FROM pgbench_branches") - delta = node.execute( - "postgres", "SELECT sum(delta) FROM pgbench_history") - self.assertEqual(bbalance, delta) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_with_tablespace_mapping_1(self): - """recovery using tablespace-mapping option""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica', - 'ptrack_enable': 'on', - 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # Create tablespace - tblspc_path = os.path.join(node.base_dir, "tblspc") - os.makedirs(tblspc_path) - with node.connect("postgres") as con: - con.connection.autocommit = True - con.execute("CREATE TABLESPACE tblspc LOCATION '%s'" % tblspc_path) - con.connection.autocommit = False - con.execute("CREATE TABLE test (id int) TABLESPACE tblspc") - con.execute("INSERT INTO test VALUES (1)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") - - # 1 - Try to restore to existing directory - node.stop() - try: - self.restore_node(backup_dir, 'node', node) - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because restore destionation is not empty.\n " - "Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertEqual( - e.message, - 'ERROR: restore destination is not empty: "{0}"\n'.format( - node.data_dir), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - # 2 - Try to restore to existing tablespace directory - node.cleanup() - try: - self.restore_node(backup_dir, 'node', node) - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because restore tablespace destination is " - "not empty.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertEqual( - e.message, - 'ERROR: restore tablespace destination ' - 'is not empty: "{0}"\n'.format(tblspc_path), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - # 3 - Restore using tablespace-mapping - tblspc_path_new = os.path.join(node.base_dir, "tblspc_new") - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-T", "%s=%s" % (tblspc_path, tblspc_path_new), - "--recovery-target-action=promote"] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - - result = node.execute("postgres", "SELECT id FROM test") - self.assertEqual(result[0][0], 1) - - # 4 - Restore using tablespace-mapping using page backup - self.backup_node(backup_dir, 'node', node) - with node.connect("postgres") as con: - con.execute("INSERT INTO test VALUES (2)") - con.commit() - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="page") - - show_pb = self.show_pb(backup_dir, 'node') - self.assertEqual(show_pb[1]['status'], "OK") - self.assertEqual(show_pb[2]['status'], "OK") - - node.stop() - node.cleanup() - tblspc_path_page = os.path.join(node.base_dir, "tblspc_page") - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-T", "%s=%s" % (tblspc_path_new, tblspc_path_page), - "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - result = node.execute("postgres", "SELECT id FROM test OFFSET 1") - self.assertEqual(result[0][0], 2) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_restore_with_tablespace_mapping_2(self): - """recovery using tablespace-mapping option and page backup""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # Full backup - self.backup_node(backup_dir, 'node', node) - self.assertEqual(self.show_pb(backup_dir, 'node')[0]['status'], "OK") - - # Create tablespace - tblspc_path = os.path.join(node.base_dir, "tblspc") - os.makedirs(tblspc_path) - with node.connect("postgres") as con: - con.connection.autocommit = True - con.execute("CREATE TABLESPACE tblspc LOCATION '%s'" % tblspc_path) - con.connection.autocommit = False - con.execute( - "CREATE TABLE tbl AS SELECT * " - "FROM generate_series(0,3) AS integer") - con.commit() - - # First page backup - self.backup_node(backup_dir, 'node', node, backup_type="page") - self.assertEqual(self.show_pb(backup_dir, 'node')[1]['status'], "OK") - self.assertEqual( - self.show_pb(backup_dir, 'node')[1]['backup-mode'], "PAGE") - - # Create tablespace table - with node.connect("postgres") as con: - con.connection.autocommit = True - con.execute("CHECKPOINT") - con.connection.autocommit = False - con.execute("CREATE TABLE tbl1 (a int) TABLESPACE tblspc") - con.execute( - "INSERT INTO tbl1 SELECT * " - "FROM generate_series(0,3) AS integer") - con.commit() - - # Second page backup - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="page") - self.assertEqual(self.show_pb(backup_dir, 'node')[2]['status'], "OK") - self.assertEqual( - self.show_pb(backup_dir, 'node')[2]['backup-mode'], "PAGE") - - node.stop() - node.cleanup() - - tblspc_path_new = os.path.join(node.base_dir, "tblspc_new") - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-T", "%s=%s" % (tblspc_path, tblspc_path_new), - "--recovery-target-action=promote"]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - node.slow_start() - - count = node.execute("postgres", "SELECT count(*) FROM tbl") - self.assertEqual(count[0][0], 4) - count = node.execute("postgres", "SELECT count(*) FROM tbl1") - self.assertEqual(count[0][0], 4) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_archive_node_backup_stream_restore_to_recovery_time(self): - """ - make node with archiving, make stream backup, - make PITR to Recovery Time - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node( - backup_dir, 'node', node, options=["--stream"]) - node.safe_psql("postgres", "create table t_heap(a int)") - node.safe_psql("postgres", "select pg_switch_xlog()") - node.stop() - node.cleanup() - - recovery_time = self.show_pb( - backup_dir, 'node', backup_id)['recovery-time'] - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--time={0}'.format(recovery_time), - "--recovery-target-action=promote" - ] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - - result = node.psql("postgres", 'select * from t_heap') - self.assertTrue('does not exist' in result[2].decode("utf-8")) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_archive_node_backup_stream_restore_to_recovery_time(self): - """ - make node with archiving, make stream backup, - make PITR to Recovery Time - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node( - backup_dir, 'node', node, options=["--stream"]) - node.safe_psql("postgres", "create table t_heap(a int)") - node.stop() - node.cleanup() - - recovery_time = self.show_pb( - backup_dir, 'node', backup_id)['recovery-time'] - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--time={0}'.format(recovery_time), - "--recovery-target-action=promote" - ] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - result = node.psql("postgres", 'select * from t_heap') - self.assertTrue('does not exist' in result[2].decode("utf-8")) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_archive_node_backup_stream_pitr(self): - """ - make node with archiving, make stream backup, - create table t_heap, make pitr to Recovery Time, - check that t_heap do not exists - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node( - backup_dir, 'node', node, options=["--stream"]) - node.safe_psql("postgres", "create table t_heap(a int)") - node.cleanup() - - recovery_time = self.show_pb( - backup_dir, 'node', backup_id)['recovery-time'] - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--time={0}'.format(recovery_time), - "--recovery-target-action=promote" - ] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - node.slow_start() - - result = node.psql("postgres", 'select * from t_heap') - self.assertEqual(True, 'does not exist' in result[2].decode("utf-8")) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_archive_node_backup_archive_pitr_2(self): - """ - make node with archiving, make archive backup, - create table t_heap, make pitr to Recovery Time, - check that t_heap do not exists - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - if self.paranoia: - pgdata = self.pgdata_content(node.data_dir) - - node.safe_psql("postgres", "create table t_heap(a int)") - node.stop() - node.cleanup() - - recovery_time = self.show_pb( - backup_dir, 'node', backup_id)['recovery-time'] - - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id), - self.restore_node( - backup_dir, 'node', node, - options=[ - "-j", "4", '--time={0}'.format(recovery_time), - "--recovery-target-action=promote" - ] - ), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - if self.paranoia: - pgdata_restored = self.pgdata_content(node.data_dir) - self.compare_pgdata(pgdata, pgdata_restored) - - node.slow_start() - - result = node.psql("postgres", 'select * from t_heap') - self.assertTrue('does not exist' in result[2].decode("utf-8")) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_archive_restore_to_restore_point(self): - """ - make node with archiving, make archive backup, - create table t_heap, make pitr to Recovery Time, - check that t_heap do not exists - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.backup_node(backup_dir, 'node', node) - - node.safe_psql( - "postgres", - "create table t_heap as select generate_series(0,10000)") - result = node.safe_psql( - "postgres", - "select * from t_heap") - node.safe_psql( - "postgres", "select pg_create_restore_point('savepoint')") - node.safe_psql( - "postgres", - "create table t_heap_1 as select generate_series(0,10000)") - node.cleanup() - - self.restore_node( - backup_dir, 'node', node, - options=[ - "--recovery-target-name=savepoint", - "--recovery-target-action=promote"]) - - node.slow_start() - - result_new = node.safe_psql("postgres", "select * from t_heap") - res = node.psql("postgres", "select * from t_heap_1") - self.assertEqual( - res[0], 1, - "Table t_heap_1 should not exist in restored instance") - - self.assertEqual(result, result_new) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/retention.py b/tests/retention.py new file mode 100644 index 000000000..0d1c72b41 --- /dev/null +++ b/tests/retention.py @@ -0,0 +1,2537 @@ +import os +import unittest +from datetime import datetime, timedelta +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from time import sleep +from distutils.dir_util import copy_tree + + +module_name = 'retention' + + +class RetentionTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_retention_redundancy_1(self): + """purge backups using redundancy-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', options=['--retention-redundancy=1']) + + # Make backups to be purged + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + # Make backups to be keeped + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + output_before = self.show_archive(backup_dir, 'node', tli=1) + + # Purge backups + log = self.delete_expired( + backup_dir, 'node', options=['--expired', '--wal']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + output_after = self.show_archive(backup_dir, 'node', tli=1) + + self.assertEqual( + output_before['max-segno'], + output_after['max-segno']) + + self.assertNotEqual( + output_before['min-segno'], + output_after['min-segno']) + + # Check that WAL segments were deleted + min_wal = output_after['min-segno'] + max_wal = output_after['max-segno'] + + for wal_name in os.listdir(os.path.join(backup_dir, 'wal', 'node')): + if not wal_name.endswith(".backup"): + + if self.archive_compress: + wal_name = wal_name[-27:] + wal_name = wal_name[:-3] + else: + wal_name = wal_name[-24:] + + self.assertTrue(wal_name >= min_wal) + self.assertTrue(wal_name <= max_wal) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_retention_window_2(self): + """purge backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + with open( + os.path.join( + backup_dir, + 'backups', + 'node', + "pg_probackup.conf"), "a") as conf: + conf.write("retention-redundancy = 1\n") + conf.write("retention-window = 1\n") + + # Make backups to be purged + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + # Make backup to be keeped + self.backup_node(backup_dir, 'node', node) + + backups = os.path.join(backup_dir, 'backups', 'node') + days_delta = 5 + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=days_delta))) + days_delta -= 1 + + # Make backup to be keeped + self.backup_node(backup_dir, 'node', node, backup_type="page") + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + # Purge backups + self.delete_expired(backup_dir, 'node', options=['--expired']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_retention_window_3(self): + """purge all backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL BACKUP + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # Take second FULL BACKUP + backup_id_2 = self.backup_node(backup_dir, 'node', node) + + # Take third FULL BACKUP + backup_id_3 = self.backup_node(backup_dir, 'node', node) + + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + # Purge backups + self.delete_expired( + backup_dir, 'node', options=['--retention-window=1', '--expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 0) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # count wal files in ARCHIVE + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_retention_window_4(self): + """purge all backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL BACKUPs + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + backup_id_2 = self.backup_node(backup_dir, 'node', node) + + backup_id_3 = self.backup_node(backup_dir, 'node', node) + + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + self.delete_pb(backup_dir, 'node', backup_id_2) + self.delete_pb(backup_dir, 'node', backup_id_3) + + # Purge backups + self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired', '--wal']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 0) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # count wal files in ARCHIVE + wals_dir = os.path.join(backup_dir, 'wal', 'node') + # n_wals = len(os.listdir(wals_dir)) + + # self.assertTrue(n_wals > 0) + + # self.delete_expired( + # backup_dir, 'node', + # options=['--retention-window=1', '--expired', '--wal']) + + # count again + n_wals = len(os.listdir(wals_dir)) + self.assertTrue(n_wals == 0) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_expire_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change FULLb backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa ERROR + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa ERROR + + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Change PAGEa1 and FULLa to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa2 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # Change PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 and FULla to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup not in [page_id_a2, page_id_b2, 'pg_probackup.conf']: + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 6) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_redundancy_expire_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change FULLb backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 and FULLa backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa ERROR + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa ERROR + + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Change PAGEa1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa2 and FULLa status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # Change PAGEb1 and FULLb status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 and FULLa status to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + self.delete_expired( + backup_dir, 'node', + options=['--retention-redundancy=1', '--expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_merge_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change FULLb to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Change PAGEa1 to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa2 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # Change PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 and FULLa to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup not in [page_id_a2, page_id_b2, 'pg_probackup.conf']: + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired', '--merge-expired']) + + self.assertIn( + "Merge incremental chain between full backup {0} and backup {1}".format( + backup_id_a, page_id_a2), + output) + + self.assertIn( + "Rename merged full backup {0} to {1}".format( + backup_id_a, page_id_a2), output) + + self.assertIn( + "Merge incremental chain between full backup {0} and backup {1}".format( + backup_id_b, page_id_b2), + output) + + self.assertIn( + "Rename merged full backup {0} to {1}".format( + backup_id_b, page_id_b2), output) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_merge_interleaved_incremental_chains_1(self): + """ + PAGEb3 + PAGEb2 + PAGEb1 + PAGEa1 + FULLb + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum':'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=5) + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + pgbench = node.pgbench(options=['-t', '20', '-c', '1']) + pgbench.wait() + + backup_id_b = self.backup_node(backup_dir, 'node', node) + pgbench = node.pgbench(options=['-t', '20', '-c', '1']) + pgbench.wait() + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgdata_a1 = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-t', '20', '-c', '1']) + pgbench.wait() + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + # Change FULL B backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-t', '20', '-c', '1']) + pgbench.wait() + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-t', '20', '-c', '1']) + pgbench.wait() + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + pgdata_b3 = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-t', '20', '-c', '1']) + pgbench.wait() + + # PAGEb3 OK + # PAGEb2 OK + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb3 OK + # PAGEb2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a1, page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired', '--merge-expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['id'], + page_id_b3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['backup-mode'], + 'FULL') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['backup-mode'], + 'FULL') + + node.cleanup() + + # Data correctness of PAGEa3 + self.restore_node(backup_dir, 'node', node, backup_id=page_id_a1) + pgdata_restored_a1 = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata_a1, pgdata_restored_a1) + + node.cleanup() + + # Data correctness of PAGEb3 + self.restore_node(backup_dir, 'node', node, backup_id=page_id_b3) + pgdata_restored_b3 = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata_b3, pgdata_restored_b3) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_window_merge_multiple_descendants(self): + """ + PAGEb3 + | PAGEa3 + -----------------------------retention window + PAGEb2 / + | PAGEa2 / should be deleted + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + backup_id_b = self.backup_node(backup_dir, 'node', node) + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # Change FULLb to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # Change PAGEa1 to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa2 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + # Change PAGEb2 and PAGEb1 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # and FULL stuff + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa3 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2, PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3, PAGEa2 and PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb( + backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb( + backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a3, page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', '--delete-expired', + '--merge-expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + # Merging chain A + self.assertIn( + "Merge incremental chain between full backup {0} and backup {1}".format( + backup_id_a, page_id_a3), + output) + + self.assertIn( + "INFO: Rename merged full backup {0} to {1}".format( + backup_id_a, page_id_a3), output) + +# self.assertIn( +# "WARNING: Backup {0} has multiple valid descendants. " +# "Automatic merge is not possible.".format( +# page_id_a1), output) + + self.assertIn( + "LOG: Consider backup {0} for purge".format( + page_id_a2), output) + + # Merge chain B + self.assertIn( + "Merge incremental chain between full backup {0} and backup {1}".format( + backup_id_b, page_id_b3), + output) + + self.assertIn( + "INFO: Rename merged full backup {0} to {1}".format( + backup_id_b, page_id_b3), output) + + self.assertIn( + "Delete: {0}".format(page_id_a2), output) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['id'], + page_id_b3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['id'], + page_id_a3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['backup-mode'], + 'FULL') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['backup-mode'], + 'FULL') + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_basic_window_merge_multiple_descendants_1(self): + """ + PAGEb3 + | PAGEa3 + -----------------------------retention window + PAGEb2 / + | PAGEa2 / + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + backup_id_b = self.backup_node(backup_dir, 'node', node) + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # Change FULLb to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # Change PAGEa1 to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 and FULLb to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa2 and FULLa to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa ERROR + + # Change PAGEb2 and PAGEb1 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # and FULL stuff + self.change_backup_status(backup_dir, 'node', backup_id_a, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + # pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + # pgbench.wait() + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + + # Change PAGEa3 to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2, PAGEb1 and FULLb to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3, PAGEa2 and PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb( + backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb( + backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a3, page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', + '--merge-expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) + + # Merging chain A + self.assertIn( + "Merge incremental chain between full backup {0} and backup {1}".format( + backup_id_a, page_id_a3), + output) + + self.assertIn( + "INFO: Rename merged full backup {0} to {1}".format( + backup_id_a, page_id_a3), output) + +# self.assertIn( +# "WARNING: Backup {0} has multiple valid descendants. " +# "Automatic merge is not possible.".format( +# page_id_a1), output) + + # Merge chain B + self.assertIn( + "Merge incremental chain between full backup {0} and backup {1}".format( + backup_id_b, page_id_b3), output) + + self.assertIn( + "INFO: Rename merged full backup {0} to {1}".format( + backup_id_b, page_id_b3), output) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[2]['id'], + page_id_b3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['id'], + page_id_a3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['id'], + page_id_a2) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[2]['backup-mode'], + 'FULL') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['backup-mode'], + 'FULL') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['backup-mode'], + 'PAGE') + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', + '--delete-expired', '--log-level-console=log']) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_window_chains(self): + """ + PAGE + -------window + PAGE + PAGE + FULL + PAGE + PAGE + FULL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Chain A + backup_id_a = self.backup_node(backup_dir, 'node', node) + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Chain B + backup_id_b = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', '--expired', + '--merge-expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 1) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_chains_1(self): + """ + PAGE + -------window + PAGE + PAGE + FULL + PAGE + PAGE + FULL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Chain A + backup_id_a = self.backup_node(backup_dir, 'node', node) + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Chain B + backup_id_b = self.backup_node(backup_dir, 'node', node) + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', + '--merge-expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + self.assertIn( + "There are no backups to delete by retention policy", + output) + + self.assertIn( + "Retention merging finished", + output) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', + '--expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 1) + + self.assertIn( + "There are no backups to merge by retention policy", + output) + + self.assertIn( + "Purging finished", + output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_window_error_backups(self): + """ + PAGE ERROR + -------window + PAGE ERROR + PAGE ERROR + PAGE ERROR + FULL ERROR + FULL + -------redundancy + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a1 = self.backup_node(backup_dir, 'node', node) + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='page', gdb=True) + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_error_backups_1(self): + """ + DELTA + PAGE ERROR + FULL + -------window + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUP + full_id = self.backup_node(backup_dir, 'node', node) + + # Take PAGE BACKUP + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='page', gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + gdb.remove_all_breakpoints() + gdb._execute('signal SIGINT') + gdb.continue_execution_until_error() + + page_id = self.show_pb(backup_dir, 'node')[1]['id'] + + # Take DELTA backup + delta_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--retention-window=2', '--delete-expired']) + + # Take FULL BACKUP + full2_id = self.backup_node(backup_dir, 'node', node) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_error_backups_2(self): + """ + DELTA + PAGE ERROR + FULL + -------window + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUP + full_id = self.backup_node(backup_dir, 'node', node) + + # Take PAGE BACKUP + gdb = self.backup_node( + backup_dir, 'node', node, backup_type='page', gdb=True) + + gdb.set_breakpoint('pg_stop_backup') + gdb.run_until_break() + gdb._execute('signal SIGKILL') + gdb.continue_execution_until_error() + + page_id = self.show_pb(backup_dir, 'node')[1]['id'] + + if self.get_version(node) < 90600: + node.safe_psql( + 'postgres', + 'SELECT pg_catalog.pg_stop_backup()') + + # Take DELTA backup + delta_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--retention-window=2', '--delete-expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + def test_retention_redundancy_overlapping_chains(self): + """""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + if self.get_version(node) < 90600: + self.del_test_dir(module_name, fname) + return unittest.skip('Skipped because ptrack support is disabled') + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', options=['--retention-redundancy=1']) + + # Make backups to be purged + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + + # Make backups to be keeped + gdb = self.backup_node(backup_dir, 'node', node, gdb=True) + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + sleep(1) + + self.backup_node(backup_dir, 'node', node, backup_type="page") + + gdb.remove_all_breakpoints() + gdb.continue_execution_until_exit() + + self.backup_node(backup_dir, 'node', node, backup_type="page") + + # Purge backups + log = self.delete_expired( + backup_dir, 'node', options=['--expired', '--wal']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + self.validate_pb(backup_dir, 'node') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_retention_redundancy_overlapping_chains(self): + """""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + if self.get_version(node) < 90600: + self.del_test_dir(module_name, fname) + return unittest.skip('Skipped because ptrack support is disabled') + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', options=['--retention-redundancy=1']) + + # Make backups to be purged + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + + # Make backups to be keeped + gdb = self.backup_node(backup_dir, 'node', node, gdb=True) + gdb.set_breakpoint('backup_files') + gdb.run_until_break() + + sleep(1) + + self.backup_node(backup_dir, 'node', node, backup_type="page") + + gdb.remove_all_breakpoints() + gdb.continue_execution_until_exit() + + self.backup_node(backup_dir, 'node', node, backup_type="page") + + # Purge backups + log = self.delete_expired( + backup_dir, 'node', options=['--expired', '--wal']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + self.validate_pb(backup_dir, 'node') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_wal_purge_victim(self): + """ + https://github.com/postgrespro/pg_probackup/issues/103 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Make ERROR incremental backup + try: + self.backup_node(backup_dir, 'node', node, backup_type='page') + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "without valid full backup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Valid backup on current timeline 1 is not found" in e.message and + "ERROR: Create new full backup before an incremental one" in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + page_id = self.show_pb(backup_dir, 'node')[0]['id'] + + sleep(1) + + # Make FULL backup + full_id = self.backup_node(backup_dir, 'node', node, options=['--delete-wal']) + + try: + self.validate_pb(backup_dir, 'node') + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "without valid full backup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "INFO: Backup {0} WAL segments are valid".format(full_id), + e.message) + self.assertIn( + "WARNING: Backup {0} has missing parent 0".format(page_id), + e.message) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_failed_merge_redundancy_retention(self): + """ + Check that retention purge works correctly with MERGING backups + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join( + module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL1 backup + full_id = self.backup_node(backup_dir, 'node', node) + + # DELTA BACKUP + delta_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # FULL2 backup + self.backup_node(backup_dir, 'node', node) + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # FULL3 backup + self.backup_node(backup_dir, 'node', node) + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + self.set_config( + backup_dir, 'node', options=['--retention-redundancy=2']) + + self.set_config( + backup_dir, 'node', options=['--retention-window=2']) + + # create pair of MERGING backup as a result of failed merge + gdb = self.merge_backup( + backup_dir, 'node', delta_id, gdb=True) + gdb.set_breakpoint('backup_non_data_file') + gdb.run_until_break() + gdb.continue_execution_until_break(2) + gdb._execute('signal SIGKILL') + + # "expire" first full backup + backups = os.path.join(backup_dir, 'backups', 'node') + with open( + os.path.join( + backups, full_id, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + # run retention merge + self.delete_expired( + backup_dir, 'node', options=['--delete-expired']) + + self.assertEqual( + 'MERGING', + self.show_pb(backup_dir, 'node', full_id)['status'], + 'Backup STATUS should be "MERGING"') + + self.assertEqual( + 'MERGING', + self.show_pb(backup_dir, 'node', delta_id)['status'], + 'Backup STATUS should be "MERGING"') + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 10) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_wal_depth_1(self): + """ + |-------------B5----------> WAL timeline3 + |-----|-------------------------> WAL timeline2 + B1 B2---| B3 B4-------B6-----> WAL timeline1 + + wal-depth=2 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'archive_timeout': '30s', + 'checkpoint_timeout': '30s', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + + self.set_config(backup_dir, 'node', options=['--archive-timeout=60s']) + + node.slow_start() + + # FULL + node.pgbench_init(scale=1) + B1 = self.backup_node(backup_dir, 'node', node) + + # PAGE + node.pgbench_init(scale=1) + B2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # generate_some more data + node.pgbench_init(scale=1) + + target_xid = node.safe_psql( + "postgres", + "select txid_current()").rstrip() + + node.pgbench_init(scale=1) + + B3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + node.pgbench_init(scale=1) + + B4 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Timeline 2 + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + output = self.restore_node( + backup_dir, 'node', node_restored, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-action=promote']) + + self.assertIn( + 'Restore of backup {0} completed'.format(B2), + output) + + self.set_auto_conf(node_restored, options={'port': node_restored.port}) + + node_restored.slow_start() + + node_restored.pgbench_init(scale=1) + + target_xid = node_restored.safe_psql( + "postgres", + "select txid_current()").rstrip() + + node_restored.pgbench_init(scale=2) + + # Timeline 3 + node_restored.cleanup() + + output = self.restore_node( + backup_dir, 'node', node_restored, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=2', + '--recovery-target-action=promote']) + + self.assertIn( + 'Restore of backup {0} completed'.format(B2), + output) + + self.set_auto_conf(node_restored, options={'port': node_restored.port}) + + node_restored.slow_start() + + node_restored.pgbench_init(scale=1) + B5 = self.backup_node( + backup_dir, 'node', node_restored, data_dir=node_restored.data_dir) + + node.pgbench_init(scale=1) + B6 = self.backup_node(backup_dir, 'node', node) + + lsn = self.show_archive(backup_dir, 'node', tli=2)['switchpoint'] + + self.validate_pb( + backup_dir, 'node', backup_id=B2, + options=['--recovery-target-lsn={0}'.format(lsn)]) + + self.validate_pb(backup_dir, 'node') + + self.del_test_dir(module_name, fname) + + def test_wal_purge(self): + """ + -------------------------------------> tli5 + ---------------------------B6--------> tli4 + S2`---------------> tli3 + S1`------------S2---B4-------B5--> tli2 + B1---S1-------------B2--------B3------> tli1 + + B* - backups + S* - switchpoints + + Expected result: + TLI5 will be purged entirely + B6--------> tli4 + S2`---------------> tli3 + S1`------------S2---B4-------B5--> tli2 + B1---S1-------------B2--------B3------> tli1 + + wal-depth=2 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_config(backup_dir, 'node', options=['--archive-timeout=60s']) + + node.slow_start() + + # STREAM FULL + stream_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.stop() + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + B1 = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=1) + + target_xid = node.safe_psql( + "postgres", + "select txid_current()").rstrip() + node.pgbench_init(scale=5) + + # B2 FULL on TLI1 + B2 = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=4) + B3 = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=4) + + # TLI 2 + node_tli2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli2')) + node_tli2.cleanup() + + output = self.restore_node( + backup_dir, 'node', node_tli2, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=1'.format(target_xid), + '--recovery-target-action=promote']) + + self.assertIn( + 'INFO: Restore of backup {0} completed'.format(B1), + output) + + self.set_auto_conf(node_tli2, options={'port': node_tli2.port}) + node_tli2.slow_start() + node_tli2.pgbench_init(scale=4) + + target_xid = node_tli2.safe_psql( + "postgres", + "select txid_current()").rstrip() + node_tli2.pgbench_init(scale=1) + + B4 = self.backup_node( + backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir) + node_tli2.pgbench_init(scale=3) + + B5 = self.backup_node( + backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir) + node_tli2.pgbench_init(scale=1) + node_tli2.cleanup() + + # TLI3 + node_tli3 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli3')) + node_tli3.cleanup() + + # Note, that successful validation here is a happy coincidence + output = self.restore_node( + backup_dir, 'node', node_tli3, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=2', + '--recovery-target-action=promote']) + + self.assertIn( + 'INFO: Restore of backup {0} completed'.format(B1), + output) + self.set_auto_conf(node_tli3, options={'port': node_tli3.port}) + node_tli3.slow_start() + node_tli3.pgbench_init(scale=5) + node_tli3.cleanup() + + # TLI4 + node_tli4 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli4')) + node_tli4.cleanup() + + self.restore_node( + backup_dir, 'node', node_tli4, backup_id=stream_id, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + self.set_auto_conf(node_tli4, options={'port': node_tli4.port}) + self.set_archiving(backup_dir, 'node', node_tli4) + node_tli4.slow_start() + + node_tli4.pgbench_init(scale=5) + + B6 = self.backup_node( + backup_dir, 'node', node_tli4, data_dir=node_tli4.data_dir) + node_tli4.pgbench_init(scale=5) + node_tli4.cleanup() + + # TLI5 + node_tli5 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli5')) + node_tli5.cleanup() + + self.restore_node( + backup_dir, 'node', node_tli5, backup_id=stream_id, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + self.set_auto_conf(node_tli5, options={'port': node_tli5.port}) + self.set_archiving(backup_dir, 'node', node_tli5) + node_tli5.slow_start() + node_tli5.pgbench_init(scale=10) + + # delete '.history' file of TLI4 + os.remove(os.path.join(backup_dir, 'wal', 'node', '00000004.history')) + # delete '.history' file of TLI5 + os.remove(os.path.join(backup_dir, 'wal', 'node', '00000005.history')) + + output = self.delete_pb( + backup_dir, 'node', + options=[ + '--delete-wal', '--dry-run', + '--log-level-console=verbose']) + + self.assertIn( + 'INFO: On timeline 4 WAL segments between 000000040000000000000002 ' + 'and 000000040000000000000006 can be removed', + output) + + self.assertIn( + 'INFO: On timeline 5 all files can be removed', + output) + + show_tli1_before = self.show_archive(backup_dir, 'node', tli=1) + show_tli2_before = self.show_archive(backup_dir, 'node', tli=2) + show_tli3_before = self.show_archive(backup_dir, 'node', tli=3) + show_tli4_before = self.show_archive(backup_dir, 'node', tli=4) + show_tli5_before = self.show_archive(backup_dir, 'node', tli=5) + + self.assertTrue(show_tli1_before) + self.assertTrue(show_tli2_before) + self.assertTrue(show_tli3_before) + self.assertTrue(show_tli4_before) + self.assertTrue(show_tli5_before) + + output = self.delete_pb( + backup_dir, 'node', + options=['--delete-wal', '--log-level-console=verbose']) + + self.assertIn( + 'INFO: On timeline 4 WAL segments between 000000040000000000000002 ' + 'and 000000040000000000000006 will be removed', + output) + + self.assertIn( + 'INFO: On timeline 5 all files will be removed', + output) + + show_tli1_after = self.show_archive(backup_dir, 'node', tli=1) + show_tli2_after = self.show_archive(backup_dir, 'node', tli=2) + show_tli3_after = self.show_archive(backup_dir, 'node', tli=3) + show_tli4_after = self.show_archive(backup_dir, 'node', tli=4) + show_tli5_after = self.show_archive(backup_dir, 'node', tli=5) + + self.assertEqual(show_tli1_before, show_tli1_after) + self.assertEqual(show_tli2_before, show_tli2_after) + self.assertEqual(show_tli3_before, show_tli3_after) + self.assertNotEqual(show_tli4_before, show_tli4_after) + self.assertNotEqual(show_tli5_before, show_tli5_after) + + self.assertEqual( + show_tli4_before['min-segno'], + '000000040000000000000002') + + self.assertEqual( + show_tli4_after['min-segno'], + '000000040000000000000006') + + self.assertFalse(show_tli5_after) + + self.validate_pb(backup_dir, 'node') + + self.del_test_dir(module_name, fname) + + def test_wal_depth_2(self): + """ + -------------------------------------> tli5 + ---------------------------B6--------> tli4 + S2`---------------> tli3 + S1`------------S2---B4-------B5--> tli2 + B1---S1-------------B2--------B3------> tli1 + + B* - backups + S* - switchpoints + wal-depth=2 + + Expected result: + TLI5 will be purged entirely + B6--------> tli4 + S2`---------------> tli3 + S1`------------S2 B4-------B5--> tli2 + B1---S1 B2--------B3------> tli1 + + wal-depth=2 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_config(backup_dir, 'node', options=['--archive-timeout=60s']) + + node.slow_start() + + # STREAM FULL + stream_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.stop() + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + B1 = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=1) + + target_xid = node.safe_psql( + "postgres", + "select txid_current()").rstrip() + node.pgbench_init(scale=5) + + # B2 FULL on TLI1 + B2 = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=4) + B3 = self.backup_node(backup_dir, 'node', node) + node.pgbench_init(scale=4) + + # TLI 2 + node_tli2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli2')) + node_tli2.cleanup() + + output = self.restore_node( + backup_dir, 'node', node_tli2, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=1'.format(target_xid), + '--recovery-target-action=promote']) + + self.assertIn( + 'INFO: Restore of backup {0} completed'.format(B1), + output) + + self.set_auto_conf(node_tli2, options={'port': node_tli2.port}) + node_tli2.slow_start() + node_tli2.pgbench_init(scale=4) + + target_xid = node_tli2.safe_psql( + "postgres", + "select txid_current()").rstrip() + node_tli2.pgbench_init(scale=1) + + B4 = self.backup_node( + backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir) + node_tli2.pgbench_init(scale=3) + + B5 = self.backup_node( + backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir) + node_tli2.pgbench_init(scale=1) + node_tli2.cleanup() + + # TLI3 + node_tli3 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli3')) + node_tli3.cleanup() + + # Note, that successful validation here is a happy coincidence + output = self.restore_node( + backup_dir, 'node', node_tli3, + options=[ + '--recovery-target-xid={0}'.format(target_xid), + '--recovery-target-timeline=2', + '--recovery-target-action=promote']) + + self.assertIn( + 'INFO: Restore of backup {0} completed'.format(B1), + output) + self.set_auto_conf(node_tli3, options={'port': node_tli3.port}) + node_tli3.slow_start() + node_tli3.pgbench_init(scale=5) + node_tli3.cleanup() + + # TLI4 + node_tli4 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli4')) + node_tli4.cleanup() + + self.restore_node( + backup_dir, 'node', node_tli4, backup_id=stream_id, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + self.set_auto_conf(node_tli4, options={'port': node_tli4.port}) + self.set_archiving(backup_dir, 'node', node_tli4) + node_tli4.slow_start() + + node_tli4.pgbench_init(scale=5) + + B6 = self.backup_node( + backup_dir, 'node', node_tli4, data_dir=node_tli4.data_dir) + node_tli4.pgbench_init(scale=5) + node_tli4.cleanup() + + # TLI5 + node_tli5 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_tli5')) + node_tli5.cleanup() + + self.restore_node( + backup_dir, 'node', node_tli5, backup_id=stream_id, + options=[ + '--recovery-target=immediate', + '--recovery-target-action=promote']) + + self.set_auto_conf(node_tli5, options={'port': node_tli5.port}) + self.set_archiving(backup_dir, 'node', node_tli5) + node_tli5.slow_start() + node_tli5.pgbench_init(scale=10) + + # delete '.history' file of TLI4 + os.remove(os.path.join(backup_dir, 'wal', 'node', '00000004.history')) + # delete '.history' file of TLI5 + os.remove(os.path.join(backup_dir, 'wal', 'node', '00000005.history')) + + output = self.delete_pb( + backup_dir, 'node', + options=[ + '--delete-wal', '--dry-run', + '--wal-depth=2', '--log-level-console=verbose']) + + start_lsn_B2 = self.show_pb(backup_dir, 'node', B2)['start-lsn'] + self.assertIn( + 'On timeline 1 WAL is protected from purge at {0}'.format(start_lsn_B2), + output) + + self.assertIn( + 'LOG: Archive backup {0} to stay consistent protect from ' + 'purge WAL interval between 000000010000000000000004 ' + 'and 000000010000000000000005 on timeline 1'.format(B1), output) + + start_lsn_B4 = self.show_pb(backup_dir, 'node', B4)['start-lsn'] + self.assertIn( + 'On timeline 2 WAL is protected from purge at {0}'.format(start_lsn_B4), + output) + + self.assertIn( + 'LOG: Timeline 3 to stay reachable from timeline 1 protect ' + 'from purge WAL interval between 000000020000000000000006 and ' + '000000020000000000000009 on timeline 2', output) + + self.assertIn( + 'LOG: Timeline 3 to stay reachable from timeline 1 protect ' + 'from purge WAL interval between 000000010000000000000004 and ' + '000000010000000000000006 on timeline 1', output) + + show_tli1_before = self.show_archive(backup_dir, 'node', tli=1) + show_tli2_before = self.show_archive(backup_dir, 'node', tli=2) + show_tli3_before = self.show_archive(backup_dir, 'node', tli=3) + show_tli4_before = self.show_archive(backup_dir, 'node', tli=4) + show_tli5_before = self.show_archive(backup_dir, 'node', tli=5) + + self.assertTrue(show_tli1_before) + self.assertTrue(show_tli2_before) + self.assertTrue(show_tli3_before) + self.assertTrue(show_tli4_before) + self.assertTrue(show_tli5_before) + + sleep(5) + + output = self.delete_pb( + backup_dir, 'node', + options=['--delete-wal', '--wal-depth=2', '--log-level-console=verbose']) + +# print(output) + + show_tli1_after = self.show_archive(backup_dir, 'node', tli=1) + show_tli2_after = self.show_archive(backup_dir, 'node', tli=2) + show_tli3_after = self.show_archive(backup_dir, 'node', tli=3) + show_tli4_after = self.show_archive(backup_dir, 'node', tli=4) + show_tli5_after = self.show_archive(backup_dir, 'node', tli=5) + + self.assertNotEqual(show_tli1_before, show_tli1_after) + self.assertNotEqual(show_tli2_before, show_tli2_after) + self.assertEqual(show_tli3_before, show_tli3_after) + self.assertNotEqual(show_tli4_before, show_tli4_after) + self.assertNotEqual(show_tli5_before, show_tli5_after) + + self.assertEqual( + show_tli4_before['min-segno'], + '000000040000000000000002') + + self.assertEqual( + show_tli4_after['min-segno'], + '000000040000000000000006') + + self.assertFalse(show_tli5_after) + + self.assertTrue(show_tli1_after['lost-segments']) + self.assertTrue(show_tli2_after['lost-segments']) + self.assertFalse(show_tli3_after['lost-segments']) + self.assertFalse(show_tli4_after['lost-segments']) + self.assertFalse(show_tli5_after) + + self.assertEqual(len(show_tli1_after['lost-segments']), 1) + self.assertEqual(len(show_tli2_after['lost-segments']), 1) + + self.assertEqual( + show_tli1_after['lost-segments'][0]['begin-segno'], + '000000010000000000000007') + + self.assertEqual( + show_tli1_after['lost-segments'][0]['end-segno'], + '00000001000000000000000A') + + self.assertEqual( + show_tli2_after['lost-segments'][0]['begin-segno'], + '00000002000000000000000A') + + self.assertEqual( + show_tli2_after['lost-segments'][0]['end-segno'], + '00000002000000000000000A') + + self.validate_pb(backup_dir, 'node') + + self.del_test_dir(module_name, fname) + + def test_basic_wal_depth(self): + """ + B1---B1----B3-----B4----B5------> tli1 + + Expected result with wal-depth=1: + B1 B1 B3 B4 B5------> tli1 + + wal-depth=1 + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_config(backup_dir, 'node', options=['--archive-timeout=60s']) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + node.pgbench_init(scale=1) + B1 = self.backup_node(backup_dir, 'node', node) + + + # B2 + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + B2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # B3 + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + B3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # B4 + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + B4 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # B5 + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + B5 = self.backup_node( + backup_dir, 'node', node, backup_type='page', + options=['--wal-depth=1', '--delete-wal']) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + target_xid = node.safe_psql( + "postgres", + "select txid_current()").rstrip() + + self.switch_wal_segment(node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + tli1 = self.show_archive(backup_dir, 'node', tli=1) + + # check that there are 4 lost_segments intervals + self.assertEqual(len(tli1['lost-segments']), 4) + + output = self.validate_pb( + backup_dir, 'node', B5, + options=['--recovery-target-xid={0}'.format(target_xid)]) + + print(output) + + self.assertIn( + 'INFO: Backup validation completed successfully on time', + output) + + self.assertIn( + 'xid {0} and LSN'.format(target_xid), + output) + + for backup_id in [B1, B2, B3, B4]: + try: + self.validate_pb( + backup_dir, 'node', backup_id, + options=['--recovery-target-xid={0}'.format(target_xid)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because page backup should not be possible " + "without valid full backup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Not enough WAL records to xid {0}".format(target_xid), + e.message) + + self.validate_pb(backup_dir, 'node') + + self.del_test_dir(module_name, fname, [node]) diff --git a/tests/retention_test.py b/tests/retention_test.py deleted file mode 100644 index 652f7c39d..000000000 --- a/tests/retention_test.py +++ /dev/null @@ -1,178 +0,0 @@ -import os -import unittest -from datetime import datetime, timedelta -from .helpers.ptrack_helpers import ProbackupTest - - -module_name = 'retention' - - -class RetentionTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_retention_redundancy_1(self): - """purge backups using redundancy-based retention policy""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - with open(os.path.join( - backup_dir, 'backups', 'node', - "pg_probackup.conf"), "a") as conf: - conf.write("retention-redundancy = 1\n") - - # Make backups to be purged - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type="page") - # Make backups to be keeped - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type="page") - - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) - - # Purge backups - log = self.delete_expired(backup_dir, 'node') - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) - - # Check that WAL segments were deleted - min_wal = None - max_wal = None - for line in log.splitlines(): - if line.startswith("INFO: removed min WAL segment"): - min_wal = line[31:-1] - elif line.startswith("INFO: removed max WAL segment"): - max_wal = line[31:-1] - - if not min_wal: - self.assertTrue(False, "min_wal is empty") - - if not max_wal: - self.assertTrue(False, "max_wal is not set") - - for wal_name in os.listdir(os.path.join(backup_dir, 'wal', 'node')): - if not wal_name.endswith(".backup"): - # wal_name_b = wal_name.encode('ascii') - self.assertEqual(wal_name[8:] > min_wal[8:], True) - self.assertEqual(wal_name[8:] > max_wal[8:], True) - - # Clean after yourself - self.del_test_dir(module_name, fname) - -# @unittest.skip("123") - def test_retention_window_2(self): - """purge backups using window-based retention policy""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - with open( - os.path.join( - backup_dir, - 'backups', - 'node', - "pg_probackup.conf"), "a") as conf: - conf.write("retention-redundancy = 1\n") - conf.write("retention-window = 1\n") - - # Make backups to be purged - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type="page") - # Make backup to be keeped - self.backup_node(backup_dir, 'node', node) - - backups = os.path.join(backup_dir, 'backups', 'node') - days_delta = 5 - for backup in os.listdir(backups): - if backup == 'pg_probackup.conf': - continue - with open( - os.path.join( - backups, backup, "backup.control"), "a") as conf: - conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( - datetime.now() - timedelta(days=days_delta))) - days_delta -= 1 - - # Make backup to be keeped - self.backup_node(backup_dir, 'node', node, backup_type="page") - - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) - - # Purge backups - self.delete_expired(backup_dir, 'node') - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) - - # Clean after yourself - self.del_test_dir(module_name, fname) - -# @unittest.skip("123") - def test_retention_wal(self): - """purge backups using window-based retention policy""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,100500) i") - - # Take FULL BACKUP - self.backup_node(backup_dir, 'node', node) - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,100500) i") - - self.backup_node(backup_dir, 'node', node) - - backups = os.path.join(backup_dir, 'backups', 'node') - days_delta = 5 - for backup in os.listdir(backups): - if backup == 'pg_probackup.conf': - continue - with open( - os.path.join( - backups, backup, "backup.control"), "a") as conf: - conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( - datetime.now() - timedelta(days=days_delta))) - days_delta -= 1 - - # Make backup to be keeped - self.backup_node(backup_dir, 'node', node, backup_type="page") - - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) - - # Purge backups - self.delete_expired( - backup_dir, 'node', options=['--retention-window=2']) - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/set_backup.py b/tests/set_backup.py new file mode 100644 index 000000000..db039c92d --- /dev/null +++ b/tests/set_backup.py @@ -0,0 +1,476 @@ +import unittest +import subprocess +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from sys import exit +from datetime import datetime, timedelta + +module_name = 'set_backup' + + +class SetBackupTest(ProbackupTest, unittest.TestCase): + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_set_backup_sanity(self): + """general sanity for set-backup command""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + recovery_time = self.show_pb( + backup_dir, 'node', backup_id=backup_id)['recovery-time'] + + expire_time_1 = "{:%Y-%m-%d %H:%M:%S}".format( + datetime.now() + timedelta(days=5)) + + try: + self.set_backup(backup_dir, False, options=['--ttl=30d']) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of missing instance. " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: required parameter not specified: --instance', + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + try: + self.set_backup( + backup_dir, 'node', + options=[ + "--ttl=30d", + "--expire-time='{0}'".format(expire_time_1)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because options cannot be mixed. " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You cannot specify '--expire-time' " + "and '--ttl' options together", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + try: + self.set_backup(backup_dir, 'node', options=["--ttl=30d"]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because of missing backup_id. " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You must specify parameter (-i, --backup-id) " + "for 'set-backup' command", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.set_backup( + backup_dir, 'node', backup_id, options=["--ttl=30d"]) + + actual_expire_time = self.show_pb( + backup_dir, 'node', backup_id=backup_id)['expire-time'] + + self.assertNotEqual(expire_time_1, actual_expire_time) + + expire_time_2 = "{:%Y-%m-%d %H:%M:%S}".format( + datetime.now() + timedelta(days=6)) + + self.set_backup( + backup_dir, 'node', backup_id, + options=["--expire-time={0}".format(expire_time_2)]) + + actual_expire_time = self.show_pb( + backup_dir, 'node', backup_id=backup_id)['expire-time'] + + self.assertIn(expire_time_2, actual_expire_time) + + # unpin backup + self.set_backup( + backup_dir, 'node', backup_id, options=["--ttl=0"]) + + attr_list = self.show_pb( + backup_dir, 'node', backup_id=backup_id) + + self.assertNotIn('expire-time', attr_list) + + self.set_backup( + backup_dir, 'node', backup_id, options=["--expire-time={0}".format(recovery_time)]) + + # parse string to datetime object + #new_expire_time = datetime.strptime(new_expire_time, '%Y-%m-%d %H:%M:%S%z') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_retention_redundancy_pinning(self): + """""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + with open(os.path.join( + backup_dir, 'backups', 'node', + "pg_probackup.conf"), "a") as conf: + conf.write("retention-redundancy = 1\n") + + self.set_config( + backup_dir, 'node', options=['--retention-redundancy=1']) + + # Make backups to be purged + full_id = self.backup_node(backup_dir, 'node', node) + page_id = self.backup_node( + backup_dir, 'node', node, backup_type="page") + # Make backups to be keeped + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + self.set_backup( + backup_dir, 'node', page_id, options=['--ttl=5d']) + + # Purge backups + log = self.delete_expired( + backup_dir, 'node', + options=['--delete-expired', '--log-level-console=LOG']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + self.assertIn('Time Window: 0d/5d', log) + self.assertIn( + 'LOG: Backup {0} is pinned until'.format(page_id), + log) + self.assertIn( + 'LOG: Retain backup {0} because his descendant ' + '{1} is guarded by retention'.format(full_id, page_id), + log) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_retention_window_pinning(self): + """purge all backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL BACKUP + backup_id_1 = self.backup_node(backup_dir, 'node', node) + page1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Take second FULL BACKUP + backup_id_2 = self.backup_node(backup_dir, 'node', node) + page2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Take third FULL BACKUP + backup_id_3 = self.backup_node(backup_dir, 'node', node) + page2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + self.set_backup( + backup_dir, 'node', page1, options=['--ttl=30d']) + + # Purge backups + out = self.delete_expired( + backup_dir, 'node', + options=[ + '--log-level-console=LOG', + '--retention-window=1', + '--delete-expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + self.assertIn( + 'LOG: Backup {0} is pinned until'.format(page1), out) + + self.assertIn( + 'LOG: Retain backup {0} because his descendant ' + '{1} is guarded by retention'.format(backup_id_1, page1), + out) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_wal_retention_and_pinning(self): + """ + B1---B2---P---B3---> + wal-depth=2 + P - pinned backup + + expected result after WAL purge: + B1 B2---P---B3---> + + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL BACKUP + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + node.pgbench_init(scale=1) + + # Take PAGE BACKUP + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--stream']) + + node.pgbench_init(scale=1) + + # Take DELTA BACKUP and pin it + expire_time = "{:%Y-%m-%d %H:%M:%S}".format( + datetime.now() + timedelta(days=6)) + backup_id_pinned = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=[ + '--stream', + '--expire-time={0}'.format(expire_time)]) + + node.pgbench_init(scale=1) + + # Take second PAGE BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--stream']) + + node.pgbench_init(scale=1) + + # Purge backups + out = self.delete_expired( + backup_dir, 'node', + options=[ + '--log-level-console=LOG', + '--delete-wal', '--wal-depth=2']) + + # print(out) + self.assertIn( + 'Pinned backup {0} is ignored for the ' + 'purpose of WAL retention'.format(backup_id_pinned), + out) + + for instance in self.show_archive(backup_dir): + timelines = instance['timelines'] + + # sanity + for timeline in timelines: + self.assertEqual( + timeline['min-segno'], + '000000010000000000000004') + self.assertEqual(timeline['status'], 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_wal_retention_and_pinning_1(self): + """ + P---B1---> + wal-depth=2 + P - pinned backup + + expected result after WAL purge: + P---B1---> + + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + expire_time = "{:%Y-%m-%d %H:%M:%S}".format( + datetime.now() + timedelta(days=6)) + + # take FULL BACKUP + backup_id_pinned = self.backup_node( + backup_dir, 'node', node, + options=['--expire-time={0}'.format(expire_time)]) + + node.pgbench_init(scale=2) + + # Take second PAGE BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + node.pgbench_init(scale=2) + + # Purge backups + out = self.delete_expired( + backup_dir, 'node', + options=[ + '--log-level-console=verbose', + '--delete-wal', '--wal-depth=2']) + + print(out) + self.assertIn( + 'Pinned backup {0} is ignored for the ' + 'purpose of WAL retention'.format(backup_id_pinned), + out) + + for instance in self.show_archive(backup_dir): + timelines = instance['timelines'] + + # sanity + for timeline in timelines: + self.assertEqual( + timeline['min-segno'], + '000000010000000000000002') + self.assertEqual(timeline['status'], 'OK') + + self.validate_pb(backup_dir) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_add_note_newlines(self): + """""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL + backup_id = self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--note={0}'.format('hello\nhello')]) + + backup_meta = self.show_pb(backup_dir, 'node', backup_id) + self.assertEqual(backup_meta['note'], "hello") + + self.set_backup(backup_dir, 'node', backup_id, options=['--note=hello\nhello']) + + backup_meta = self.show_pb(backup_dir, 'node', backup_id) + self.assertEqual(backup_meta['note'], "hello") + + self.set_backup(backup_dir, 'node', backup_id, options=['--note=none']) + + backup_meta = self.show_pb(backup_dir, 'node', backup_id) + self.assertNotIn('note', backup_meta) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_add_big_note(self): + """""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + +# note = node.safe_psql( +# "postgres", +# "SELECT repeat('hello', 400)").rstrip() # TODO: investigate + + note = node.safe_psql( + "postgres", + "SELECT repeat('hello', 210)").rstrip() + + # FULL + try: + self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--note={0}'.format(note)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because note is too large " + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup note cannot exceed 1024 bytes", + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + note = node.safe_psql( + "postgres", + "SELECT repeat('hello', 200)").rstrip() + + backup_id = self.backup_node( + backup_dir, 'node', node, + options=['--stream', '--note={0}'.format(note)]) + + backup_meta = self.show_pb(backup_dir, 'node', backup_id) + self.assertEqual(backup_meta['note'], note) + + # Clean after yourself + self.del_test_dir(module_name, fname) \ No newline at end of file diff --git a/tests/show.py b/tests/show.py new file mode 100644 index 000000000..0da95dcbb --- /dev/null +++ b/tests/show.py @@ -0,0 +1,540 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException + + +module_name = 'show' + + +class OptionTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_show_1(self): + """Status DONE and OK""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.assertEqual( + self.backup_node( + backup_dir, 'node', node, + options=["--log-level-console=off"]), + None + ) + self.assertIn("OK", self.show_pb(backup_dir, 'node', as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_show_json(self): + """Status DONE and OK""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.assertEqual( + self.backup_node( + backup_dir, 'node', node, + options=["--log-level-console=off"]), + None + ) + self.backup_node(backup_dir, 'node', node) + self.assertIn("OK", self.show_pb(backup_dir, 'node', as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_corrupt_2(self): + """Status CORRUPT""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + # delete file which belong to backup + file = os.path.join( + backup_dir, "backups", "node", + backup_id, "database", "postgresql.conf") + os.remove(file) + + try: + self.validate_pb(backup_dir, 'node', backup_id) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because backup corrupted." + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd + ) + ) + except ProbackupException as e: + self.assertIn( + 'data files are corrupted', + e.message, + '\n Unexpected Error Message: {0}\n' + ' CMD: {1}'.format(repr(e.message), self.cmd) + ) + self.assertIn("CORRUPT", self.show_pb(backup_dir, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_no_control_file(self): + """backup.control doesn't exist""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + # delete backup.control file + file = os.path.join( + backup_dir, "backups", "node", + backup_id, "backup.control") + os.remove(file) + + output = self.show_pb(backup_dir, 'node', as_text=True, as_json=False) + + self.assertIn( + 'Control file', + output) + + self.assertIn( + 'doesn\'t exist', + output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_empty_control_file(self): + """backup.control is empty""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + # truncate backup.control file + file = os.path.join( + backup_dir, "backups", "node", + backup_id, "backup.control") + fd = open(file, 'w') + fd.close() + + output = self.show_pb(backup_dir, 'node', as_text=True, as_json=False) + + self.assertIn( + 'Control file', + output) + + self.assertIn( + 'is empty', + output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_corrupt_control_file(self): + """backup.control contains invalid option""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + # corrupt backup.control file + file = os.path.join( + backup_dir, "backups", "node", + backup_id, "backup.control") + fd = open(file, 'a') + fd.write("statuss = OK") + fd.close() + + self.assertIn( + 'WARNING: Invalid option "statuss" in file', + self.show_pb(backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_corrupt_correctness(self): + """backup.control contains invalid option""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=1) + + # FULL + backup_local_id = self.backup_node( + backup_dir, 'node', node, no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node(backup_dir, 'node', node) + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, + options=['--remote-proto=ssh', '--remote-host=localhost']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # DELTA + backup_local_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + self.delete_pb(backup_dir, 'node', backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--remote-proto=ssh', '--remote-host=localhost']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + self.delete_pb(backup_dir, 'node', backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # PAGE + backup_local_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + self.delete_pb(backup_dir, 'node', backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', + options=['--remote-proto=ssh', '--remote-host=localhost']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + self.delete_pb(backup_dir, 'node', backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_corrupt_correctness_1(self): + """backup.control contains invalid option""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=1) + + # FULL + backup_local_id = self.backup_node( + backup_dir, 'node', node, no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node(backup_dir, 'node', node) + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, + options=['--remote-proto=ssh', '--remote-host=localhost']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # change data + pgbench = node.pgbench(options=['-T', '10', '--no-vacuum']) + pgbench.wait() + + # DELTA + backup_local_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + self.delete_pb(backup_dir, 'node', backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--remote-proto=ssh', '--remote-host=localhost']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + self.delete_pb(backup_dir, 'node', backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # PAGE + backup_local_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + self.delete_pb(backup_dir, 'node', backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', + options=['--remote-proto=ssh', '--remote-host=localhost']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + self.delete_pb(backup_dir, 'node', backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_corrupt_correctness_2(self): + """backup.control contains invalid option""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=1) + + # FULL + backup_local_id = self.backup_node( + backup_dir, 'node', node, + options=['--compress'], no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, options=['--compress']) + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, + options=['--remote-proto=ssh', '--remote-host=localhost', '--compress']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # change data + pgbench = node.pgbench(options=['-T', '10', '--no-vacuum']) + pgbench.wait() + + # DELTA + backup_local_id = self.backup_node( + backup_dir, 'node', node, + backup_type='delta', options=['--compress'], no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + self.delete_pb(backup_dir, 'node', backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', options=['--compress']) + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta', + options=['--remote-proto=ssh', '--remote-host=localhost', '--compress']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + self.delete_pb(backup_dir, 'node', backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # PAGE + backup_local_id = self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--compress'], no_remote=True) + + output_local = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_local_id) + self.delete_pb(backup_dir, 'node', backup_local_id) + + if self.remote: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', options=['--compress']) + else: + backup_remote_id = self.backup_node( + backup_dir, 'node', node, backup_type='page', + options=['--remote-proto=ssh', '--remote-host=localhost', '--compress']) + + output_remote = self.show_pb( + backup_dir, 'node', as_json=False, backup_id=backup_remote_id) + self.delete_pb(backup_dir, 'node', backup_remote_id) + + # check correctness + self.assertEqual( + output_local['data-bytes'], + output_remote['data-bytes']) + + self.assertEqual( + output_local['uncompressed-bytes'], + output_remote['uncompressed-bytes']) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/show_test.py b/tests/show_test.py deleted file mode 100644 index 931da1844..000000000 --- a/tests/show_test.py +++ /dev/null @@ -1,203 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException - - -module_name = 'show' - - -class OptionTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_show_1(self): - """Status DONE and OK""" - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.assertEqual( - self.backup_node( - backup_dir, 'node', node, - options=["--log-level-console=panic"]), - None - ) - self.assertIn("OK", self.show_pb(backup_dir, 'node', as_text=True)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_show_json(self): - """Status DONE and OK""" - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.assertEqual( - self.backup_node( - backup_dir, 'node', node, - options=["--log-level-console=panic"]), - None - ) - self.backup_node(backup_dir, 'node', node) - self.assertIn("OK", self.show_pb(backup_dir, 'node', as_text=True)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_corrupt_2(self): - """Status CORRUPT""" - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - - # delete file which belong to backup - file = os.path.join( - backup_dir, "backups", "node", - backup_id, "database", "postgresql.conf") - os.remove(file) - - try: - self.validate_pb(backup_dir, 'node', backup_id) - # we should die here because exception is what we expect to happen - self.assertEqual( - 1, 0, - "Expecting Error because backup corrupted.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd - ) - ) - except ProbackupException as e: - self.assertIn( - 'data files are corrupted\n', - e.message, - '\n Unexpected Error Message: {0}\n' - ' CMD: {1}'.format(repr(e.message), self.cmd) - ) - self.assertIn("CORRUPT", self.show_pb(backup_dir, as_text=True)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_no_control_file(self): - """backup.control doesn't exist""" - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - - # delete backup.control file - file = os.path.join( - backup_dir, "backups", "node", - backup_id, "backup.control") - os.remove(file) - - self.assertIn('control file "{0}" doesn\'t exist'.format(file), self.show_pb(backup_dir, 'node', as_text=True)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_empty_control_file(self): - """backup.control is empty""" - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - - # truncate backup.control file - file = os.path.join( - backup_dir, "backups", "node", - backup_id, "backup.control") - fd = open(file, 'w') - fd.close() - - self.assertIn('control file "{0}" is empty'.format(file), self.show_pb(backup_dir, 'node', as_text=True)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_corrupt_control_file(self): - """backup.control contains invalid option""" - fname = self.id().split('.')[3] - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - - # corrupt backup.control file - file = os.path.join( - backup_dir, "backups", "node", - backup_id, "backup.control") - fd = open(file, 'a') - fd.write("statuss = OK") - fd.close() - - self.assertIn('invalid option "statuss" in file'.format(file), self.show_pb(backup_dir, 'node', as_text=True)) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/snapfs.py b/tests/snapfs.py new file mode 100644 index 000000000..a7f926c4c --- /dev/null +++ b/tests/snapfs.py @@ -0,0 +1,60 @@ +import unittest +import os +from time import sleep +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException + + +module_name = 'snapfs' + + +class SnapFSTest(ProbackupTest, unittest.TestCase): + + # @unittest.expectedFailure + @unittest.skipUnless(ProbackupTest.enterprise, 'skip') + def test_snapfs_simple(self): + """standart backup modes with ARCHIVE WAL method""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + 'postgres', + 'select pg_make_snapshot()') + + node.pgbench_init(scale=10) + + pgbench = node.pgbench(options=['-T', '50', '-c', '2', '--no-vacuum']) + pgbench.wait() + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + 'postgres', + 'select pg_remove_snapshot(1)') + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + + self.restore_node( + backup_dir, 'node', + node, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/time_stamp.py b/tests/time_stamp.py new file mode 100644 index 000000000..8abd55a2b --- /dev/null +++ b/tests/time_stamp.py @@ -0,0 +1,74 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException + + +module_name = 'time_stamp' + +class CheckTimeStamp(ProbackupTest, unittest.TestCase): + + def test_start_time_format(self): + """Test backup ID changing after start-time editing in backup.control. + We should convert local time in UTC format""" + # Create simple node + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums']) + + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + backup_id = self.backup_node(backup_dir, 'node', node, options=['--stream', '-j 2']) + show_backup = self.show_pb(backup_dir, 'node') + + i = 0 + while i < 2: + with open(os.path.join(backup_dir, "backups", "node", backup_id, "backup.control"), "r+") as f: + output = "" + for line in f: + if line.startswith('start-time') is True: + if i == 0: + output = output + str(line[:-5])+'+00\''+'\n' + else: + output = output + str(line[:-5]) + '\'' + '\n' + else: + output = output + str(line) + f.close() + + with open(os.path.join(backup_dir, "backups", "node", backup_id, "backup.control"), "w") as fw: + fw.write(output) + fw.flush() + show_backup = show_backup + self.show_pb(backup_dir, 'node') + i += 1 + + self.assertTrue(show_backup[1]['id'] == show_backup[2]['id'], "ERROR: Localtime format using instead of UTC") + + node.stop() + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_server_date_style(self): + """Issue #112""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={"datestyle": "GERMAN, DMY"}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.start() + + self.backup_node( + backup_dir, 'node', node, options=['--stream', '-j 2']) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/validate.py b/tests/validate.py new file mode 100644 index 000000000..c84ea5294 --- /dev/null +++ b/tests/validate.py @@ -0,0 +1,3979 @@ +import os +import unittest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from datetime import datetime, timedelta +import subprocess +from sys import exit +import time +import hashlib + + +module_name = 'validate' + + +class ValidateTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_basic_validate_nullified_heap_page_backup(self): + """ + make node with nullified heap block + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + file_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + node.safe_psql( + "postgres", + "CHECKPOINT") + + # Nullify some block in PostgreSQL + file = os.path.join(node.data_dir, file_path) + with open(file, 'r+b') as f: + f.seek(8192) + f.write(b"\x00"*8192) + f.flush() + f.close + + self.backup_node( + backup_dir, 'node', node, options=['--log-level-file=verbose']) + + pgdata = self.pgdata_content(node.data_dir) + + if not self.remote: + log_file_path = os.path.join(backup_dir, "log", "pg_probackup.log") + with open(log_file_path) as f: + log_content = f.read() + self.assertIn( + 'File: "{0}" blknum 1, empty page'.format(file), + log_content, + 'Failed to detect nullified block') + + self.validate_pb(backup_dir, options=["-j", "4"]) + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_validate_wal_unreal_values(self): + """ + make node with archiving, make archive backup + validate to both real and unreal values + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a text)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=3) + + target_time = self.show_pb( + backup_dir, 'node', backup_id)['recovery-time'] + after_backup_time = datetime.now().replace(second=0, microsecond=0) + + # Validate to real time + self.assertIn( + "INFO: Backup validation completed successfully", + self.validate_pb( + backup_dir, 'node', + options=["--time={0}".format(target_time), "-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + # Validate to unreal time + unreal_time_1 = after_backup_time - timedelta(days=2) + try: + self.validate_pb( + backup_dir, 'node', options=["--time={0}".format( + unreal_time_1), "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of validation to unreal time.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Backup satisfying target options is not found', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Validate to unreal time #2 + unreal_time_2 = after_backup_time + timedelta(days=2) + try: + self.validate_pb( + backup_dir, 'node', + options=["--time={0}".format(unreal_time_2), "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of validation to unreal time.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: Not enough WAL records to time' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Validate to real xid + target_xid = None + with node.connect("postgres") as con: + res = con.execute( + "INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + self.switch_wal_segment(node) + time.sleep(5) + + self.assertIn( + "INFO: Backup validation completed successfully", + self.validate_pb( + backup_dir, 'node', options=["--xid={0}".format(target_xid), + "-j", "4"]), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + # Validate to unreal xid + unreal_xid = int(target_xid) + 1000 + try: + self.validate_pb( + backup_dir, 'node', options=["--xid={0}".format(unreal_xid), + "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of validation to unreal xid.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: Not enough WAL records to xid' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Validate with backup ID + output = self.validate_pb(backup_dir, 'node', backup_id, + options=["-j", "4"]) + self.assertIn( + "INFO: Validating backup {0}".format(backup_id), + output, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + self.assertIn( + "INFO: Backup {0} data files are valid".format(backup_id), + output, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + self.assertIn( + "INFO: Backup {0} WAL segments are valid".format(backup_id), + output, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + self.assertIn( + "INFO: Backup {0} is valid".format(backup_id), + output, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + self.assertIn( + "INFO: Validate of backup {0} completed".format(backup_id), + output, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_basic_validate_corrupted_intermediate_backup(self): + """ + make archive node, take FULL, PAGE1, PAGE2 backups, + corrupt file in PAGE1 backup, + run validate on PAGE1, expect PAGE1 to gain status CORRUPT + and PAGE2 gain status ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # PAGE1 + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(10000,20000) i") + # PAGE2 + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Corrupt some file + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id_2, 'database', file_path) + with open(file, "r+b", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Simple validate + try: + self.validate_pb( + backup_dir, 'node', backup_id=backup_id_2, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data files corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format( + backup_id_2) in e.message and + 'ERROR: Backup {0} is corrupt'.format( + backup_id_2) in e.message and + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "CORRUPT"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_3)['status'], + 'Backup STATUS should be "ORPHAN"') + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.skip("skip") + def test_validate_corrupted_intermediate_backups(self): + """ + make archive node, take FULL, PAGE1, PAGE2 backups, + corrupt file in FULL and PAGE1 backups, run validate on PAGE1, + expect FULL and PAGE1 to gain status CORRUPT and + PAGE2 gain status ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_path_t_heap = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # FULL + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap_1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_path_t_heap_1 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap_1')").rstrip() + # PAGE1 + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(20000,30000) i") + # PAGE2 + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join( + backup_dir, 'backups', 'node', + backup_id_1, 'database', file_path_t_heap) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Corrupt some file in PAGE1 backup + file_page1 = os.path.join( + backup_dir, 'backups', 'node', + backup_id_2, 'database', file_path_t_heap_1) + with open(file_page1, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Validate PAGE1 + try: + self.validate_pb( + backup_dir, 'node', backup_id=backup_id_2, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data files corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format( + backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n ' + 'CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_1) in e.message and + 'WARNING: Invalid CRC of backup file' in e.message and + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent'.format( + backup_id_2) in e.message and + 'WARNING: Backup {0} is orphaned because his parent'.format( + backup_id_3) in e.message and + 'ERROR: Backup {0} is orphan.'.format( + backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id_1)['status'], + 'Backup STATUS should be "CORRUPT"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_3)['status'], + 'Backup STATUS should be "ORPHAN"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_specific_error_intermediate_backups(self): + """ + make archive node, take FULL, PAGE1, PAGE2 backups, + change backup status of FULL and PAGE1 to ERROR, + run validate on PAGE1 + purpose of this test is to be sure that not only + CORRUPT backup descendants can be orphanized + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULL backup status to ERROR + control_path = os.path.join( + backup_dir, 'backups', 'node', backup_id_1, 'backup.control') + + with open(control_path, 'r') as f: + actual_control = f.read() + + new_control_file = '' + for line in actual_control.splitlines(): + new_control_file += line.replace( + 'status = OK', 'status = ERROR') + new_control_file += '\n' + + with open(control_path, 'wt') as f: + f.write(new_control_file) + f.flush() + f.close() + + # Validate PAGE1 + try: + self.validate_pb( + backup_dir, 'node', backup_id=backup_id_2, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because backup has status ERROR.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: Backup {0} is orphaned because ' + 'his parent {1} has status: ERROR'.format( + backup_id_2, backup_id_1) in e.message and + 'INFO: Validating parents for backup {0}'.format( + backup_id_2) in e.message and + 'WARNING: Backup {0} has status ERROR. Skip validation.'.format( + backup_id_1) and + 'ERROR: Backup {0} is orphan.'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n ' + 'CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id_1)['status'], + 'Backup STATUS should be "ERROR"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_3)['status'], + 'Backup STATUS should be "ORPHAN"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_error_intermediate_backups(self): + """ + make archive node, take FULL, PAGE1, PAGE2 backups, + change backup status of FULL and PAGE1 to ERROR, + run validate on instance + purpose of this test is to be sure that not only + CORRUPT backup descendants can be orphanized + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULL backup status to ERROR + control_path = os.path.join( + backup_dir, 'backups', 'node', backup_id_1, 'backup.control') + + with open(control_path, 'r') as f: + actual_control = f.read() + + new_control_file = '' + for line in actual_control.splitlines(): + new_control_file += line.replace( + 'status = OK', 'status = ERROR') + new_control_file += '\n' + + with open(control_path, 'wt') as f: + f.write(new_control_file) + f.flush() + f.close() + + # Validate instance + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because backup has status ERROR.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Backup {0} is orphaned because " + "his parent {1} has status: ERROR".format( + backup_id_2, backup_id_1) in e.message and + 'WARNING: Backup {0} has status ERROR. Skip validation'.format( + backup_id_1) in e.message and + "WARNING: Some backups are not valid" in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'ERROR', + self.show_pb(backup_dir, 'node', backup_id_1)['status'], + 'Backup STATUS should be "ERROR"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'ORPHAN', + self.show_pb(backup_dir, 'node', backup_id_3)['status'], + 'Backup STATUS should be "ORPHAN"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_intermediate_backups_1(self): + """ + make archive node, FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2, + corrupt file in PAGE1 and PAGE4, run validate on PAGE3, + expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 + to gain status ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_page_2 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(10000,20000) i") + backup_id_4 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE4 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(20000,30000) i") + backup_id_5 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE5 + node.safe_psql( + "postgres", + "create table t_heap1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_page_5 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap1')").rstrip() + backup_id_6 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE6 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(30000,40000) i") + backup_id_7 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # FULL2 + backup_id_8 = self.backup_node(backup_dir, 'node', node) + + # Corrupt some file in PAGE2 and PAGE5 backups + file_page1 = os.path.join( + backup_dir, 'backups', 'node', backup_id_3, 'database', file_page_2) + with open(file_page1, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + file_page4 = os.path.join( + backup_dir, 'backups', 'node', backup_id_6, 'database', file_page_5) + with open(file_page4, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Validate PAGE3 + try: + self.validate_pb( + backup_dir, 'node', + backup_id=backup_id_4, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data files corruption.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format( + backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_1) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_2) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_3) in e.message and + 'WARNING: Invalid CRC of backup file' in e.message and + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because ' + 'his parent {1} has status: CORRUPT'.format( + backup_id_4, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because ' + 'his parent {1} has status: CORRUPT'.format( + backup_id_5, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because ' + 'his parent {1} has status: CORRUPT'.format( + backup_id_6, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because ' + 'his parent {1} has status: CORRUPT'.format( + backup_id_7, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n ' + 'CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], + 'Backup STATUS should be "OK"') + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "OK"') + self.assertEqual( + 'CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], + 'Backup STATUS should be "CORRUPT"') + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], + 'Backup STATUS should be "OK"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_specific_target_corrupted_intermediate_backups(self): + """ + make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 + corrupt file in PAGE1 and PAGE4, run validate on PAGE3 to specific xid, + expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to + gain status ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_page_2 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(10000,20000) i") + backup_id_4 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE4 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(20000,30000) i") + + target_xid = node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(30001, 30001) i RETURNING (xmin)").rstrip() + + backup_id_5 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE5 + node.safe_psql( + "postgres", + "create table t_heap1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_page_5 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap1')").rstrip() + backup_id_6 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE6 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(30000,40000) i") + backup_id_7 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # FULL2 + backup_id_8 = self.backup_node(backup_dir, 'node', node) + + # Corrupt some file in PAGE2 and PAGE5 backups + file_page1 = os.path.join( + backup_dir, 'backups', 'node', + backup_id_3, 'database', file_page_2) + with open(file_page1, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + file_page4 = os.path.join( + backup_dir, 'backups', 'node', + backup_id_6, 'database', file_page_5) + with open(file_page4, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Validate PAGE3 + try: + self.validate_pb( + backup_dir, 'node', + options=[ + '-i', backup_id_4, '--xid={0}'.format(target_xid), "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data files corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format( + backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_1) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_2) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_3) in e.message and + 'WARNING: Invalid CRC of backup file' in e.message and + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his ' + 'parent {1} has status: CORRUPT'.format( + backup_id_4, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his ' + 'parent {1} has status: CORRUPT'.format( + backup_id_5, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his ' + 'parent {1} has status: CORRUPT'.format( + backup_id_6, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his ' + 'parent {1} has status: CORRUPT'.format( + backup_id_7, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'ERROR: Backup {0} is orphan'.format( + backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], 'Backup STATUS should be "OK"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_instance_with_corrupted_page(self): + """ + make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, + corrupt file in PAGE1 backup and run validate on instance, + expect PAGE1 to gain status CORRUPT, PAGE2 to gain status ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_path_t_heap1 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap1')").rstrip() + # PAGE1 + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(20000,30000) i") + # PAGE2 + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + # FULL1 + backup_id_4 = self.backup_node( + backup_dir, 'node', node) + # PAGE3 + backup_id_5 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join( + backup_dir, 'backups', 'node', backup_id_2, + 'database', file_path_t_heap1) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Validate Instance + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data files corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "INFO: Validate backups of the instance 'node'" in e.message, + "\n Unexpected Error Message: {0}\n " + "CMD: {1}".format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_5) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_5) in e.message and + 'INFO: Backup {0} WAL segments are valid'.format( + backup_id_5) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_4) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_4) in e.message and + 'INFO: Backup {0} WAL segments are valid'.format( + backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_3) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_3) in e.message and + 'INFO: Backup {0} WAL segments are valid'.format( + backup_id_3) in e.message and + 'WARNING: Backup {0} is orphaned because ' + 'his parent {1} has status: CORRUPT'.format( + backup_id_3, backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_2) in e.message and + 'WARNING: Invalid CRC of backup file' in e.message and + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format( + backup_id_1) in e.message and + 'INFO: Backup {0} data files are valid'.format( + backup_id_1) in e.message and + 'INFO: Backup {0} WAL segments are valid'.format( + backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Some backups are not valid' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], + 'Backup STATUS should be "OK"') + self.assertEqual( + 'CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "CORRUPT"') + self.assertEqual( + 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], + 'Backup STATUS should be "ORPHAN"') + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], + 'Backup STATUS should be "OK"') + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], + 'Backup STATUS should be "OK"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_instance_with_corrupted_full_and_try_restore(self): + """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, + corrupt file in FULL backup and run validate on instance, + expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN, + try to restore backup with --no-validation option""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + file_path_t_heap = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + # PAGE1 + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(20000,30000) i") + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # FULL1 + backup_id_4 = self.backup_node(backup_dir, 'node', node) + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(30000,40000) i") + backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join( + backup_dir, 'backups', 'node', + backup_id_1, 'database', file_path_t_heap) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Validate Instance + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and "INFO: Validate backups of the instance 'node'" in e.message + and 'WARNING: Invalid CRC of backup file' in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"') + + node.cleanup() + restore_out = self.restore_node( + backup_dir, 'node', node, + options=["--no-validate"]) + self.assertIn( + "INFO: Restore of backup {0} completed.".format(backup_id_5), + restore_out, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(self.output), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_instance_with_corrupted_full(self): + """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, + corrupt file in FULL backup and run validate on instance, + expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + file_path_t_heap = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + # PAGE1 + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(20000,30000) i") + + backup_id_3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # FULL1 + backup_id_4 = self.backup_node( + backup_dir, 'node', node) + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, " + "md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(30000,40000) i") + backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join( + backup_dir, 'backups', 'node', + backup_id_1, 'database', file_path_t_heap) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Validate Instance + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data files corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and "INFO: Validate backups of the instance 'node'" in e.message + and 'WARNING: Invalid CRC of backup file' in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupt_wal_1(self): + """make archive node, take FULL1, PAGE1,PAGE2,FULL2,PAGE3,PAGE4 backups, corrupt all wal files, run validate, expect errors""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a text)") + con.commit() + + backup_id_2 = self.backup_node(backup_dir, 'node', node) + + # Corrupt WAL + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] + wals.sort() + for wal in wals: + with open(os.path.join(wals_dir, wal), "rb+", 0) as f: + f.seek(42) + f.write(b"blablablaadssaaaaaaaaaaaaaaa") + f.flush() + f.close + + # Simple validate + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segments corruption.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: Backup' in e.message and + 'WAL segments are corrupted' in e.message and + "WARNING: There are not enough WAL " + "records to consistenly restore backup" in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id_1)['status'], + 'Backup STATUS should be "CORRUPT"') + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id_2)['status'], + 'Backup STATUS should be "CORRUPT"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupt_wal_2(self): + """make archive node, make full backup, corrupt all wal files, run validate to real xid, expect errors""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a text)") + con.commit() + + backup_id = self.backup_node(backup_dir, 'node', node) + target_xid = None + with node.connect("postgres") as con: + res = con.execute( + "INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + # Corrupt WAL + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] + wals.sort() + for wal in wals: + with open(os.path.join(wals_dir, wal), "rb+", 0) as f: + f.seek(128) + f.write(b"blablablaadssaaaaaaaaaaaaaaa") + f.flush() + f.close + + # Validate to xid + try: + self.validate_pb( + backup_dir, + 'node', + backup_id, + options=[ + "--xid={0}".format(target_xid), "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segments corruption.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: Backup' in e.message and + 'WAL segments are corrupted' in e.message and + "WARNING: There are not enough WAL " + "records to consistenly restore backup" in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "CORRUPT"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_wal_lost_segment_1(self): + """make archive node, make archive full backup, + delete from archive wal segment which belong to previous backup + run validate, expecting error because of missing wal segment + make sure that backup status is 'CORRUPT' + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + backup_id = self.backup_node(backup_dir, 'node', node) + + # Delete wal segment + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] + wals.sort() + file = os.path.join(backup_dir, 'wal', 'node', wals[-1]) + os.remove(file) + + # cut out '.gz' + if self.archive_compress: + file = file[:-3] + + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segment disappearance.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "is absent" in e.message and + "WARNING: There are not enough WAL records to consistenly " + "restore backup {0}".format(backup_id) in e.message and + "WARNING: Backup {0} WAL segments are corrupted".format( + backup_id) in e.message and + "WARNING: Some backups are not valid" in e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup {0} should have STATUS "CORRUPT"') + + # Run validate again + try: + self.validate_pb(backup_dir, 'node', backup_id, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup corruption.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'INFO: Revalidating backup {0}'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'ERROR: Backup {0} is corrupt.'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupt_wal_between_backups(self): + """ + make archive node, make full backup, corrupt all wal files, + run validate to real xid, expect errors + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + # make some wals + node.pgbench_init(scale=3) + + with node.connect("postgres") as con: + con.execute("CREATE TABLE tbl0005 (a text)") + con.commit() + + with node.connect("postgres") as con: + res = con.execute( + "INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") + con.commit() + target_xid = res[0][0] + + if self.get_version(node) < self.version_to_num('10.0'): + walfile = node.safe_psql( + 'postgres', + 'select pg_xlogfile_name(pg_current_xlog_location())').rstrip() + else: + walfile = node.safe_psql( + 'postgres', + 'select pg_walfile_name(pg_current_wal_lsn())').rstrip() + + if self.archive_compress: + walfile = walfile + '.gz' + self.switch_wal_segment(node) + + # generate some wals + node.pgbench_init(scale=3) + + self.backup_node(backup_dir, 'node', node) + + # Corrupt WAL + wals_dir = os.path.join(backup_dir, 'wal', 'node') + with open(os.path.join(wals_dir, walfile), "rb+", 0) as f: + f.seek(9000) + f.write(b"b") + f.flush() + f.close + + # Validate to xid + try: + self.validate_pb( + backup_dir, + 'node', + backup_id, + options=[ + "--xid={0}".format(target_xid), "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segments corruption.\n" + " Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: Not enough WAL records to xid' in e.message and + 'WARNING: Recovery can be done up to time' in e.message and + "ERROR: Not enough WAL records to xid {0}\n".format( + target_xid), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[0]['status'], + 'Backup STATUS should be "OK"') + + self.assertEqual( + 'OK', + self.show_pb(backup_dir, 'node')[1]['status'], + 'Backup STATUS should be "OK"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_pgpro702_688(self): + """ + make node without archiving, make stream backup, + get Recovery Time, validate to Recovery Time + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node( + backup_dir, 'node', node, options=["--stream"]) + recovery_time = self.show_pb( + backup_dir, 'node', backup_id=backup_id)['recovery-time'] + + try: + self.validate_pb( + backup_dir, 'node', + options=["--time={0}".format(recovery_time), "-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of wal segment disappearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WAL archive is empty. You cannot restore backup to a ' + 'recovery target without WAL archive', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_pgpro688(self): + """ + make node with archiving, make backup, get Recovery Time, + validate to Recovery Time. Waiting PGPRO-688. RESOLVED + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + recovery_time = self.show_pb( + backup_dir, 'node', backup_id)['recovery-time'] + + self.validate_pb( + backup_dir, 'node', options=["--time={0}".format(recovery_time), + "-j", "4"]) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_pgpro561(self): + """ + make node with archiving, make stream backup, + restore it to node1, check that archiving is not successful on node1 + """ + fname = self.id().split('.')[3] + node1 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node1'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node1', node1) + self.set_archiving(backup_dir, 'node1', node1) + node1.slow_start() + + backup_id = self.backup_node( + backup_dir, 'node1', node1, options=["--stream"]) + + node2 = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node2')) + node2.cleanup() + + node1.psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,256) i") + + self.backup_node( + backup_dir, 'node1', node1, + backup_type='page', options=["--stream"]) + self.restore_node(backup_dir, 'node1', data_dir=node2.data_dir) + + self.set_auto_conf( + node2, {'port': node2.port, 'archive_mode': 'off'}) + + node2.slow_start() + + self.set_auto_conf( + node2, {'archive_mode': 'on'}) + + node2.stop() + node2.slow_start() + + timeline_node1 = node1.get_control_data()["Latest checkpoint's TimeLineID"] + timeline_node2 = node2.get_control_data()["Latest checkpoint's TimeLineID"] + self.assertEqual( + timeline_node1, timeline_node2, + "Timelines on Master and Node1 should be equal. " + "This is unexpected") + + archive_command_node1 = node1.safe_psql( + "postgres", "show archive_command") + archive_command_node2 = node2.safe_psql( + "postgres", "show archive_command") + self.assertEqual( + archive_command_node1, archive_command_node2, + "Archive command on Master and Node should be equal. " + "This is unexpected") + + # result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL") + ## self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip())) + # if result == "": + # self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command') + + node1.psql( + "postgres", + "create table t_heap_1 as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10) i") + + self.switch_wal_segment(node1) + +# wals_dir = os.path.join(backup_dir, 'wal', 'node1') +# wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join( +# wals_dir, f)) and not f.endswith('.backup') and not f.endswith('.part')] +# wals = map(str, wals) +# print(wals) + + self.switch_wal_segment(node2) + +# wals_dir = os.path.join(backup_dir, 'wal', 'node1') +# wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join( +# wals_dir, f)) and not f.endswith('.backup') and not f.endswith('.part')] +# wals = map(str, wals) +# print(wals) + + time.sleep(5) + + log_file = os.path.join(node2.logs_dir, 'postgresql.log') + with open(log_file, 'r') as f: + log_content = f.read() + self.assertTrue( + 'LOG: archive command failed with exit code 1' in log_content and + 'DETAIL: The failed archive command was:' in log_content and + 'WAL file already exists in archive with different checksum' in log_content, + 'Expecting error messages about failed archive_command' + ) + self.assertFalse( + 'pg_probackup archive-push completed successfully' in log_content) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_full(self): + """ + make node with archiving, take full backup, and three page backups, + take another full backup and three page backups + corrupt second full backup, run validate, check that + second full backup became CORRUPT and his page backups are ORPHANs + remove corruption and run valudate again, check that + second full backup and his page backups are OK + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'checkpoint_timeout': '30'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + backup_id = self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "alter system set archive_command = 'false'") + node.reload() + try: + self.backup_node( + backup_dir, 'node', node, + backup_type='page', options=['--archive-timeout=1s']) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + pass + + self.assertTrue( + self.show_pb(backup_dir, 'node')[6]['status'] == 'ERROR') + self.set_archiving(backup_dir, 'node', node) + node.reload() + self.backup_node(backup_dir, 'node', node, backup_type='page') + + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'postgresql.auto.conf') + + file_new = os.path.join(backup_dir, 'postgresql.auto.conf') + os.rename(file, file_new) + + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'Validating backup {0}'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Some backups are not valid'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue( + self.show_pb(backup_dir, 'node')[3]['status'] == 'CORRUPT') + self.assertTrue( + self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + self.assertTrue( + self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue( + self.show_pb(backup_dir, 'node')[6]['status'] == 'ERROR') + self.assertTrue( + self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + + os.rename(file_new, file) + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + except ProbackupException as e: + self.assertIn( + 'WARNING: Some backups are not valid'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue( + self.show_pb(backup_dir, 'node')[6]['status'] == 'ERROR') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_full_1(self): + """ + make node with archiving, take full backup, and three page backups, + take another full backup and four page backups + corrupt second full backup, run validate, check that + second full backup became CORRUPT and his page backups are ORPHANs + remove corruption from full backup and corrupt his second page backup + run valudate again, check that + second full backup and his firts page backups are OK, + second page should be CORRUPT + third page should be ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + backup_id = self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + backup_id_page = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'postgresql.auto.conf') + + file_new = os.path.join(backup_dir, 'postgresql.auto.conf') + os.rename(file, file_new) + + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'Validating backup {0}'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Some backups are not valid'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'CORRUPT') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + os.rename(file_new, file) + + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id_page, 'database', 'backup_label') + + file_new = os.path.join(backup_dir, 'backup_label') + os.rename(file, file_new) + + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + except ProbackupException as e: + self.assertIn( + 'WARNING: Some backups are not valid'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'CORRUPT') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_full_2(self): + """ + PAGE2_2b + PAGE2_2a + PAGE2_4 + PAGE2_4 <- validate + PAGE2_3 + PAGE2_2 <- CORRUPT + PAGE2_1 + FULL2 + PAGE1_1 + FULL1 + corrupt second page backup, run validate on PAGE2_3, check that + PAGE2_2 became CORRUPT and his descendants are ORPHANs, + take two more PAGE backups, which now trace their origin + to PAGE2_1 - latest OK backup, + run validate on PAGE2_3, check that PAGE2_2a and PAGE2_2b are OK, + + remove corruption from PAGE2_2 and run validate on PAGE2_4 + """ + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + corrupt_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + validate_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + file = os.path.join( + backup_dir, 'backups', 'node', + corrupt_id, 'database', 'backup_label') + + file_new = os.path.join(backup_dir, 'backup_label') + os.rename(file, file_new) + + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'INFO: Validating parents for backup {0}'.format(validate_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'INFO: Validating backup {0}'.format( + self.show_pb(backup_dir, 'node')[2]['id']), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'INFO: Validating backup {0}'.format( + self.show_pb(backup_dir, 'node')[3]['id']), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'INFO: Validating backup {0}'.format( + corrupt_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} data files are corrupted'.format( + corrupt_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'CORRUPT') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # THIS IS GOLD!!!! + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'Backup {0} data files are valid'.format( + self.show_pb(backup_dir, 'node')[9]['id']), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'Backup {0} data files are valid'.format( + self.show_pb(backup_dir, 'node')[8]['id']), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[7]['id'], corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[6]['id'], corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[5]['id'], corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'INFO: Revalidating backup {0}'.format( + corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Some backups are not valid', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'CORRUPT') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # revalidate again + + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} has status: ORPHAN'.format(validate_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[7]['id'], corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[6]['id'], corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[5]['id'], corrupt_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'INFO: Validating parents for backup {0}'.format( + validate_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'INFO: Validating backup {0}'.format( + self.show_pb(backup_dir, 'node')[2]['id']), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'INFO: Validating backup {0}'.format( + self.show_pb(backup_dir, 'node')[3]['id']), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'INFO: Revalidating backup {0}'.format( + corrupt_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} data files are corrupted'.format( + corrupt_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'ERROR: Backup {0} is orphan.'.format( + validate_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Fix CORRUPT + os.rename(file_new, file) + + output = self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + + self.assertIn( + 'WARNING: Backup {0} has status: ORPHAN'.format(validate_id), + output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[7]['id'], corrupt_id), + output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[6]['id'], corrupt_id), + output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[5]['id'], corrupt_id), + output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Validating parents for backup {0}'.format( + validate_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Validating backup {0}'.format( + self.show_pb(backup_dir, 'node')[2]['id']), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Validating backup {0}'.format( + self.show_pb(backup_dir, 'node')[3]['id']), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Revalidating backup {0}'.format( + corrupt_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Backup {0} data files are valid'.format( + corrupt_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Revalidating backup {0}'.format( + self.show_pb(backup_dir, 'node')[5]['id']), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Backup {0} data files are valid'.format( + self.show_pb(backup_dir, 'node')[5]['id']), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Revalidating backup {0}'.format( + validate_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Backup {0} data files are valid'.format( + validate_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Backup {0} WAL segments are valid'.format( + validate_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Backup {0} is valid.'.format( + validate_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'INFO: Validate of backup {0} completed.'.format( + validate_id), output, + '\n Unexpected Output Message: {0}\n'.format( + repr(output))) + + # Now we have two perfectly valid backup chains based on FULL2 + + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_full_missing(self): + """ + make node with archiving, take full backup, and three page backups, + take another full backup and four page backups + corrupt second full backup, run validate, check that + second full backup became CORRUPT and his page backups are ORPHANs + remove corruption from full backup and remove his second page backup + run valudate again, check that + second full backup and his firts page backups are OK, + third page should be ORPHAN + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + backup_id = self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + backup_id_page = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + file = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'postgresql.auto.conf') + + file_new = os.path.join(backup_dir, 'postgresql.auto.conf') + os.rename(file, file_new) + + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of data file dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'Validating backup {0}'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} data files are corrupted'.format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} has status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[5]['id'], backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'CORRUPT') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Full backup is fixed + os.rename(file_new, file) + + # break PAGE + old_directory = os.path.join( + backup_dir, 'backups', 'node', backup_id_page) + new_directory = os.path.join(backup_dir, backup_id_page) + os.rename(old_directory, new_directory) + + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + except ProbackupException as e: + self.assertIn( + 'WARNING: Some backups are not valid', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[7]['id'], + backup_id_page), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[6]['id'], + backup_id_page), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has parent {1} with status: CORRUPT'.format( + self.show_pb(backup_dir, 'node')[5]['id'], backup_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + # missing backup is here + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # validate should be idempotent - user running validate + # second time must be provided with ID of missing backup + + try: + self.validate_pb(backup_dir, options=["-j", "4"]) + except ProbackupException as e: + self.assertIn( + 'WARNING: Some backups are not valid', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[7]['id'], + backup_id_page), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[6]['id'], + backup_id_page), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + # missing backup is here + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # fix missing PAGE backup + os.rename(new_directory, old_directory) + # exit(1) + + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + output = self.validate_pb(backup_dir, options=["-j", "4"]) + + self.assertIn( + 'INFO: All backups are valid', + output, + '\n Unexpected Error Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'WARNING: Backup {0} has parent {1} with status: ORPHAN'.format( + self.show_pb(backup_dir, 'node')[8]['id'], + self.show_pb(backup_dir, 'node')[6]['id']), + output, + '\n Unexpected Error Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'WARNING: Backup {0} has parent {1} with status: ORPHAN'.format( + self.show_pb(backup_dir, 'node')[7]['id'], + self.show_pb(backup_dir, 'node')[6]['id']), + output, + '\n Unexpected Error Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Revalidating backup {0}'.format( + self.show_pb(backup_dir, 'node')[6]['id']), + output, + '\n Unexpected Error Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Revalidating backup {0}'.format( + self.show_pb(backup_dir, 'node')[7]['id']), + output, + '\n Unexpected Error Message: {0}\n'.format( + repr(output))) + + self.assertIn( + 'Revalidating backup {0}'.format( + self.show_pb(backup_dir, 'node')[8]['id']), + output, + '\n Unexpected Error Message: {0}\n'.format( + repr(output))) + + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_file_size_corruption_no_validate(self): + + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + # initdb_params=['--data-checksums'], + ) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,1000) i") + node.safe_psql( + "postgres", + "CHECKPOINT;") + + heap_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + heap_size = node.safe_psql( + "postgres", + "select pg_relation_size('t_heap')") + + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4"], asynchronous=False, gdb=False) + + node.stop() + node.cleanup() + + # Let`s do file corruption + with open( + os.path.join( + backup_dir, "backups", 'node', backup_id, + "database", heap_path), "rb+", 0) as f: + f.truncate(int(heap_size) - 4096) + f.flush() + f.close + + node.cleanup() + + try: + self.restore_node( + backup_dir, 'node', node, + options=["--no-validate"]) + except ProbackupException as e: + self.assertTrue( + "ERROR: Backup files restoring failed" in e.message, + repr(e.message)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_specific_backup_with_missing_backup(self): + """ + PAGE3_2 + PAGE3_1 + FULL3 + PAGE2_5 + PAGE2_4 <- validate + PAGE2_3 + PAGE2_2 <- missing + PAGE2_1 + FULL2 + PAGE1_2 + PAGE1_1 + FULL1 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # CHAIN1 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN2 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + missing_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + validate_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN3 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + old_directory = os.path.join(backup_dir, 'backups', 'node', missing_id) + new_directory = os.path.join(backup_dir, missing_id) + + os.rename(old_directory, new_directory) + + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[7]['id'], missing_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[6]['id'], missing_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[5]['id'], missing_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + # missing backup + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[7]['id'], missing_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[6]['id'], missing_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[5]['id'], missing_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + os.rename(new_directory, old_directory) + + # Revalidate backup chain + self.validate_pb(backup_dir, 'node', validate_id, options=["-j", "4"]) + + self.assertTrue(self.show_pb(backup_dir, 'node')[11]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_specific_backup_with_missing_backup_1(self): + """ + PAGE3_2 + PAGE3_1 + FULL3 + PAGE2_5 + PAGE2_4 <- validate + PAGE2_3 + PAGE2_2 <- missing + PAGE2_1 + FULL2 <- missing + PAGE1_2 + PAGE1_1 + FULL1 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # CHAIN1 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN2 + missing_full_id = self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + missing_page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + validate_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN3 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + page_old_directory = os.path.join( + backup_dir, 'backups', 'node', missing_page_id) + page_new_directory = os.path.join(backup_dir, missing_page_id) + os.rename(page_old_directory, page_new_directory) + + full_old_directory = os.path.join( + backup_dir, 'backups', 'node', missing_full_id) + full_new_directory = os.path.join(backup_dir, missing_full_id) + os.rename(full_old_directory, full_new_directory) + + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[6]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[5]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[4]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + # PAGE2_1 + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') # <- SHit + # FULL2 + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + os.rename(page_new_directory, page_old_directory) + os.rename(full_new_directory, full_old_directory) + + # Revalidate backup chain + self.validate_pb(backup_dir, 'node', validate_id, options=["-j", "4"]) + + self.assertTrue(self.show_pb(backup_dir, 'node')[11]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'ORPHAN') # <- Fail + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_with_missing_backup_1(self): + """ + PAGE3_2 + PAGE3_1 + FULL3 + PAGE2_5 + PAGE2_4 <- validate + PAGE2_3 + PAGE2_2 <- missing + PAGE2_1 + FULL2 <- missing + PAGE1_2 + PAGE1_1 + FULL1 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # CHAIN1 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN2 + missing_full_id = self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + missing_page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + validate_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN3 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Break PAGE + page_old_directory = os.path.join( + backup_dir, 'backups', 'node', missing_page_id) + page_new_directory = os.path.join(backup_dir, missing_page_id) + os.rename(page_old_directory, page_new_directory) + + # Break FULL + full_old_directory = os.path.join( + backup_dir, 'backups', 'node', missing_full_id) + full_new_directory = os.path.join(backup_dir, missing_full_id) + os.rename(full_old_directory, full_new_directory) + + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[6]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[5]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[4]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + # PAGE2_2 is missing + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + # FULL1 - is missing + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + os.rename(page_new_directory, page_old_directory) + + # Revalidate backup chain + try: + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} has status: ORPHAN'.format( + validate_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[7]['id'], + missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[6]['id'], + missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[5]['id'], + missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[4]['id'], + missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[3]['id'], + missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'ORPHAN') + # FULL1 - is missing + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + os.rename(full_new_directory, full_old_directory) + + # Revalidate chain + self.validate_pb(backup_dir, 'node', validate_id, options=["-j", "4"]) + + self.assertTrue(self.show_pb(backup_dir, 'node')[11]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_with_missing_backup_2(self): + """ + PAGE3_2 + PAGE3_1 + FULL3 + PAGE2_5 + PAGE2_4 + PAGE2_3 + PAGE2_2 <- missing + PAGE2_1 + FULL2 <- missing + PAGE1_2 + PAGE1_1 + FULL1 + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # CHAIN1 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN2 + missing_full_id = self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + missing_page_id = self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # CHAIN3 + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + self.backup_node(backup_dir, 'node', node, backup_type='page') + + page_old_directory = os.path.join(backup_dir, 'backups', 'node', missing_page_id) + page_new_directory = os.path.join(backup_dir, missing_page_id) + os.rename(page_old_directory, page_new_directory) + + full_old_directory = os.path.join(backup_dir, 'backups', 'node', missing_full_id) + full_new_directory = os.path.join(backup_dir, missing_full_id) + os.rename(full_old_directory, full_new_directory) + + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[6]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[5]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[4]['id'], missing_page_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[3]['id'], missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + # PAGE2_2 is missing + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'ORPHAN') + # FULL1 - is missing + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + os.rename(page_new_directory, page_old_directory) + + # Revalidate backup chain + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of backup dissapearance.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[7]['id'], missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[6]['id'], missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[5]['id'], missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} is orphaned because his parent {1} is missing'.format( + self.show_pb(backup_dir, 'node')[4]['id'], missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + self.assertIn( + 'WARNING: Backup {0} has missing parent {1}'.format( + self.show_pb(backup_dir, 'node')[3]['id'], missing_full_id), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[9]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[8]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') + self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'ORPHAN') + # FULL1 - is missing + self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') + self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_corrupt_pg_control_via_resetxlog(self): + """ PGPRO-2096 """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + if self.get_version(node) < 100000: + pg_resetxlog_path = self.get_bin_path('pg_resetxlog') + wal_dir = 'pg_xlog' + else: + pg_resetxlog_path = self.get_bin_path('pg_resetwal') + wal_dir = 'pg_wal' + + os.mkdir( + os.path.join( + backup_dir, 'backups', 'node', backup_id, 'database', wal_dir, 'archive_status')) + + pg_control_path = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'global', 'pg_control') + + md5_before = hashlib.md5( + open(pg_control_path, 'rb').read()).hexdigest() + + self.run_binary( + [ + pg_resetxlog_path, + '-D', + os.path.join(backup_dir, 'backups', 'node', backup_id, 'database'), + '-o 42', + '-f' + ], + asynchronous=False) + + md5_after = hashlib.md5( + open(pg_control_path, 'rb').read()).hexdigest() + + if self.verbose: + print('\n MD5 BEFORE resetxlog: {0}\n MD5 AFTER resetxlog: {1}'.format( + md5_before, md5_after)) + + # Validate backup + try: + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) + self.assertEqual( + 1, 0, + "Expecting Error because of pg_control change.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'data files are corrupted', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validation_after_backup(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + gdb = self.backup_node( + backup_dir, 'node', node, gdb=True, options=['--stream']) + + gdb.set_breakpoint('pgBackupValidate') + gdb.run_until_break() + + backup_id = self.show_pb(backup_dir, 'node')[0]['id'] + + file = os.path.join( + backup_dir, "backups", "node", backup_id, + "database", "postgresql.conf") + os.remove(file) + + gdb.continue_execution_until_exit() + + self.assertEqual( + 'CORRUPT', + self.show_pb(backup_dir, 'node', backup_id)['status'], + 'Backup STATUS should be "ERROR"') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_validate_corrupt_tablespace_map(self): + """ + Check that corruption in tablespace_map is detected + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.create_tblspace_in_node(node, 'external_dir') + + node.safe_psql( + 'postgres', + 'CREATE TABLE t_heap(a int) TABLESPACE "external_dir"') + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + tablespace_map = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'tablespace_map') + + # Corrupt tablespace_map file in FULL backup + with open(tablespace_map, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because tablespace_map is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'WARNING: Invalid CRC of backup file', + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_validate_target_lsn(self): + """ + Check validation to specific LSN + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,10000) i") + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + self.restore_node(backup_dir, 'node', node_restored) + + self.set_auto_conf( + node_restored, {'port': node_restored.port}) + + node_restored.slow_start() + + self.switch_wal_segment(node) + + backup_id = self.backup_node( + backup_dir, 'node', node_restored, + data_dir=node_restored.data_dir) + + target_lsn = self.show_pb(backup_dir, 'node')[1]['stop-lsn'] + + self.delete_pb(backup_dir, 'node', backup_id) + + self.validate_pb( + backup_dir, 'node', + options=[ + '--recovery-target-timeline=2', + '--recovery-target-lsn={0}'.format(target_lsn)]) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_partial_validate_empty_and_mangled_database_map(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + + node.slow_start() + + # create databases + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # FULL backup with database_map + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + pgdata = self.pgdata_content(node.data_dir) + + # truncate database_map + path = os.path.join( + backup_dir, 'backups', 'node', + backup_id, 'database', 'database_map') + with open(path, "w") as f: + f.close() + + try: + self.validate_pb( + backup_dir, 'node', + options=["--db-include=db1"]) + self.assertEqual( + 1, 0, + "Expecting Error because database_map is empty.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "WARNING: Backup {0} data files are corrupted".format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # mangle database_map + with open(path, "w") as f: + f.write("42") + f.close() + + try: + self.validate_pb( + backup_dir, 'node', + options=["--db-include=db1"]) + self.assertEqual( + 1, 0, + "Expecting Error because database_map is empty.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "WARNING: Backup {0} data files are corrupted".format( + backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_partial_validate_exclude(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # FULL backup + backup_id = self.backup_node(backup_dir, 'node', node) + + try: + self.validate_pb( + backup_dir, 'node', + options=[ + "--db-include=db1", + "--db-exclude=db2"]) + self.assertEqual( + 1, 0, + "Expecting Error because of 'db-exclude' and 'db-include'.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You cannot specify '--db-include' " + "and '--db-exclude' together", e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.validate_pb( + backup_dir, 'node', + options=[ + "--db-exclude=db1", + "--db-exclude=db5", + "--log-level-console=verbose"]) + self.assertEqual( + 1, 0, + "Expecting Error because of missing backup ID.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You must specify parameter (-i, --backup-id) for partial validation", + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + output = self.validate_pb( + backup_dir, 'node', backup_id, + options=[ + "--db-exclude=db1", + "--db-exclude=db5", + "--log-level-console=verbose"]) + + self.assertIn( + "VERBOSE: Skip file validation due to partial restore", output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_partial_validate_include(self): + """ + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + for i in range(1, 10, 1): + node.safe_psql( + 'postgres', + 'CREATE database db{0}'.format(i)) + + # FULL backup + backup_id = self.backup_node(backup_dir, 'node', node) + + try: + self.validate_pb( + backup_dir, 'node', + options=[ + "--db-include=db1", + "--db-exclude=db2"]) + self.assertEqual( + 1, 0, + "Expecting Error because of 'db-exclude' and 'db-include'.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: You cannot specify '--db-include' " + "and '--db-exclude' together", e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + output = self.validate_pb( + backup_dir, 'node', backup_id, + options=[ + "--db-include=db1", + "--db-include=db5", + "--db-include=postgres", + "--log-level-console=verbose"]) + + self.assertIn( + "VERBOSE: Skip file validation due to partial restore", output) + + output = self.validate_pb( + backup_dir, 'node', backup_id, + options=["--log-level-console=verbose"]) + + self.assertNotIn( + "VERBOSE: Skip file validation due to partial restore", output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_not_validate_diffenent_pg_version(self): + """Do not validate backup, if binary is compiled with different PG version""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + backup_id = self.backup_node(backup_dir, 'node', node) + + control_file = os.path.join( + backup_dir, "backups", "node", backup_id, + "backup.control") + + pg_version = node.major_version + + if pg_version.is_integer(): + pg_version = int(pg_version) + + fake_new_pg_version = pg_version + 1 + + with open(control_file, 'r') as f: + data = f.read(); + + data = data.replace(str(pg_version), str(fake_new_pg_version)) + + with open(control_file, 'w') as f: + f.write(data); + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because validation is forbidden if server version of backup " + "is different from the server version of pg_probackup.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + "ERROR: Backup {0} has server version".format(backup_id), + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_validate_corrupt_page_header_map(self): + """ + Check that corruption in page_header_map is detected + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + ok_1 = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + ok_2 = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + page_header_map = os.path.join( + backup_dir, 'backups', 'node', backup_id, 'page_header_map') + + # Corrupt tablespace_map file in FULL backup + with open(page_header_map, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because page_header is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: An error occured during metadata decompression' in e.message and + 'data error' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn("Backup {0} is corrupt".format(backup_id), e.message) + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because page_header is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'WARNING: An error occured during metadata decompression' in e.message and + 'data error' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.assertIn("INFO: Backup {0} data files are valid".format(ok_1), e.message) + self.assertIn("WARNING: Backup {0} data files are corrupted".format(backup_id), e.message) + self.assertIn("INFO: Backup {0} data files are valid".format(ok_2), e.message) + + self.assertIn("WARNING: Some backups are not valid", e.message) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_validate_truncated_page_header_map(self): + """ + Check that corruption in page_header_map is detected + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + ok_1 = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + ok_2 = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + page_header_map = os.path.join( + backup_dir, 'backups', 'node', backup_id, 'page_header_map') + + # truncate page_header_map file + with open(page_header_map, "rb+", 0) as f: + f.truncate(121) + f.flush() + f.close + + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because page_header is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Backup {0} is corrupt'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because page_header is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn("INFO: Backup {0} data files are valid".format(ok_1), e.message) + self.assertIn("WARNING: Backup {0} data files are corrupted".format(backup_id), e.message) + self.assertIn("INFO: Backup {0} data files are valid".format(ok_2), e.message) + self.assertIn("WARNING: Some backups are not valid", e.message) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_validate_missing_page_header_map(self): + """ + Check that corruption in page_header_map is detected + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + ok_1 = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + ok_2 = self.backup_node(backup_dir, 'node', node, options=['--stream']) + + page_header_map = os.path.join( + backup_dir, 'backups', 'node', backup_id, 'page_header_map') + + # unlink page_header_map file + os.remove(page_header_map) + + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because page_header is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: Backup {0} is corrupt'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + try: + self.validate_pb(backup_dir) + self.assertEqual( + 1, 0, + "Expecting Error because page_header is corrupted.\n " + "Output: {0} \n CMD: {1}".format( + self.output, self.cmd)) + except ProbackupException as e: + self.assertIn("INFO: Backup {0} data files are valid".format(ok_1), e.message) + self.assertIn("WARNING: Backup {0} data files are corrupted".format(backup_id), e.message) + self.assertIn("INFO: Backup {0} data files are valid".format(ok_2), e.message) + self.assertIn("WARNING: Some backups are not valid", e.message) + + # Clean after yourself + self.del_test_dir(module_name, fname, [node]) + +# validate empty backup list +# page from future during validate +# page from future during backup + +# corrupt block, so file become unaligned: +# 712 Assert(header.compressed_size <= BLCKSZ); +# 713 +# 714 read_len = fread(compressed_page.data, 1, +# 715 MAXALIGN(header.compressed_size), in); +# 716 if (read_len != MAXALIGN(header.compressed_size)) +# -> 717 elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", +# 718 blknum, file->path, read_len, header.compressed_size); \ No newline at end of file diff --git a/tests/validate_test.py b/tests/validate_test.py deleted file mode 100644 index ab091c578..000000000 --- a/tests/validate_test.py +++ /dev/null @@ -1,1730 +0,0 @@ -import os -import unittest -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException -from datetime import datetime, timedelta -import subprocess -from sys import exit -import time - - -module_name = 'validate' - - -class ValidateTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_validate_wal_unreal_values(self): - """ - make node with archiving, make archive backup - validate to both real and unreal values - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a text)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "10"] - ) - - pgbench.wait() - pgbench.stdout.close() - - target_time = self.show_pb( - backup_dir, 'node', backup_id)['recovery-time'] - after_backup_time = datetime.now().replace(second=0, microsecond=0) - - # Validate to real time - self.assertIn( - "INFO: backup validation completed successfully", - self.validate_pb( - backup_dir, 'node', - options=["--time={0}".format(target_time)]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - # Validate to unreal time - unreal_time_1 = after_backup_time - timedelta(days=2) - try: - self.validate_pb( - backup_dir, 'node', options=["--time={0}".format( - unreal_time_1)]) - self.assertEqual( - 1, 0, - "Expecting Error because of validation to unreal time.\n " - "Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertEqual( - e.message, - 'ERROR: Full backup satisfying target options is not found.\n', - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - # Validate to unreal time #2 - unreal_time_2 = after_backup_time + timedelta(days=2) - try: - self.validate_pb(backup_dir, 'node', options=["--time={0}".format(unreal_time_2)]) - self.assertEqual(1, 0, "Expecting Error because of validation to unreal time.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue('ERROR: not enough WAL records to time' in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - # Validate to real xid - target_xid = None - with node.connect("postgres") as con: - res = con.execute("INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") - con.commit() - target_xid = res[0][0] - self.switch_wal_segment(node) - - self.assertIn("INFO: backup validation completed successfully", - self.validate_pb(backup_dir, 'node', options=["--xid={0}".format(target_xid)]), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) - - # Validate to unreal xid - unreal_xid = int(target_xid) + 1000 - try: - self.validate_pb(backup_dir, 'node', options=["--xid={0}".format(unreal_xid)]) - self.assertEqual(1, 0, "Expecting Error because of validation to unreal xid.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue('ERROR: not enough WAL records to xid' in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - # Validate with backup ID - self.assertIn("INFO: Validating backup {0}".format(backup_id), - self.validate_pb(backup_dir, 'node', backup_id), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) - self.assertIn("INFO: Backup {0} data files are valid".format(backup_id), - self.validate_pb(backup_dir, 'node', backup_id), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) - self.assertIn("INFO: Backup {0} WAL segments are valid".format(backup_id), - self.validate_pb(backup_dir, 'node', backup_id), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) - self.assertIn("INFO: Backup {0} is valid".format(backup_id), - self.validate_pb(backup_dir, 'node', backup_id), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) - self.assertIn("INFO: Validate of backup {0} completed".format(backup_id), - self.validate_pb(backup_dir, 'node', backup_id), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupted_intermediate_backup(self): - """make archive node, take FULL, PAGE1, PAGE2 backups, corrupt file in PAGE1 backup, - run validate on PAGE1, expect PAGE1 to gain status CORRUPT and PAGE2 get status ORPHAN""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # FULL - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_path = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - # PAGE1 - backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i") - # PAGE2 - backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # Corrupt some file - file = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path) - with open(file, "rb+", 0) as f: - f.seek(42) - f.write(b"blah") - f.flush() - f.close - - # Simple validate - try: - self.validate_pb(backup_dir, 'node', backup_id=backup_id_2, - options=['--log-level-file=verbose']) - self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'INFO: Validating parents for backup {0}'.format(backup_id_2) in e.message - and 'ERROR: Backup {0} is corrupt'.format(backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupted_intermediate_backups(self): - """make archive node, take FULL, PAGE1, PAGE2 backups, - corrupt file in FULL and PAGE1 backupd, run validate on PAGE1, - expect FULL and PAGE1 to gain status CORRUPT and PAGE2 get status ORPHAN""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_path_t_heap = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - # FULL - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - node.safe_psql( - "postgres", - "create table t_heap_1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_path_t_heap_1 = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap_1')").rstrip() - # PAGE1 - backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") - # PAGE2 - backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # Corrupt some file in FULL backup - file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap) - with open(file_full, "rb+", 0) as f: - f.seek(84) - f.write(b"blah") - f.flush() - f.close - - # Corrupt some file in PAGE1 backup - file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path_t_heap_1) - with open(file_page1, "rb+", 0) as f: - f.seek(42) - f.write(b"blah") - f.flush() - f.close - - # Validate PAGE1 - try: - self.validate_pb(backup_dir, 'node', backup_id=backup_id_2, - options=['--log-level-file=verbose']) - self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue('INFO: Validating parents for backup {0}'.format(backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format(backup_id_1) in e.message - and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message - and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because his parent'.format(backup_id_2) in e.message - and 'WARNING: Backup {0} is orphaned because his parent'.format(backup_id_3) in e.message - and 'ERROR: Backup {0} is orphan.'.format(backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupted_intermediate_backups_1(self): - """make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 backups, - corrupt file in PAGE1 and PAGE4, run validate on PAGE3, - expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to gain status ORPHAN""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # FULL1 - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - # PAGE1 - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,10000) i") - backup_id_2 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # PAGE2 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,10000) i") - file_page_2 = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - backup_id_3 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # PAGE3 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(10000,20000) i") - backup_id_4 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # PAGE4 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(20000,30000) i") - backup_id_5 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # PAGE5 - node.safe_psql( - "postgres", - "create table t_heap1 as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,10000) i") - file_page_5 = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap1')").rstrip() - backup_id_6 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # PAGE6 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(30000,40000) i") - backup_id_7 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # FULL2 - backup_id_8 = self.backup_node(backup_dir, 'node', node) - - # Corrupt some file in PAGE2 and PAGE5 backups - file_page1 = os.path.join( - backup_dir, 'backups/node', backup_id_3, 'database', file_page_2) - with open(file_page1, "rb+", 0) as f: - f.seek(84) - f.write(b"blah") - f.flush() - f.close - - file_page4 = os.path.join( - backup_dir, 'backups/node', backup_id_6, 'database', file_page_5) - with open(file_page4, "rb+", 0) as f: - f.seek(42) - f.write(b"blah") - f.flush() - f.close - - # Validate PAGE3 - try: - self.validate_pb( - backup_dir, 'node', - backup_id=backup_id_4, - options=['--log-level-file=verbose']) - self.assertEqual( - 1, 0, - "Expecting Error because of data files corruption.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'INFO: Validating parents for backup {0}'.format( - backup_id_4) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_1) in e.message and - 'INFO: Backup {0} data files are valid'.format( - backup_id_1) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_2) in e.message and - 'INFO: Backup {0} data files are valid'.format( - backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_3) in e.message and - 'WARNING: Invalid CRC of backup file "{0}"'.format( - file_page1) in e.message and - 'WARNING: Backup {0} data files are corrupted'.format( - backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because ' - 'his parent {1} is corrupted'.format( - backup_id_4, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because ' - 'his parent {1} is corrupted'.format( - backup_id_5, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because ' - 'his parent {1} is corrupted'.format( - backup_id_6, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because ' - 'his parent {1} is corrupted'.format( - backup_id_7, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message, - '\n Unexpected Error Message: {0}\n ' - 'CMD: {1}'.format(repr(e.message), self.cmd)) - - self.assertEqual( - 'OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], - 'Backup STATUS should be "OK"') - self.assertEqual( - 'OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], - 'Backup STATUS should be "OK"') - self.assertEqual( - 'CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], - 'Backup STATUS should be "CORRUPT"') - self.assertEqual( - 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], - 'Backup STATUS should be "ORPHAN"') - self.assertEqual( - 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], - 'Backup STATUS should be "ORPHAN"') - self.assertEqual( - 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], - 'Backup STATUS should be "ORPHAN"') - self.assertEqual( - 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], - 'Backup STATUS should be "ORPHAN"') - self.assertEqual( - 'OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], - 'Backup STATUS should be "OK"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_specific_target_corrupted_intermediate_backups(self): - """make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 backups, - corrupt file in PAGE1 and PAGE4, run validate on PAGE3 to specific xid, - expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to gain status ORPHAN""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - # FULL1 - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - # PAGE1 - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE2 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_page_2 = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE3 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i") - backup_id_4 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE4 - target_xid = node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i RETURNING (xmin)")[0][0] - backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE5 - node.safe_psql( - "postgres", - "create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_page_5 = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap1')").rstrip() - backup_id_6 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE6 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i") - backup_id_7 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # FULL2 - backup_id_8 = self.backup_node(backup_dir, 'node', node) - - # Corrupt some file in PAGE2 and PAGE5 backups - file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_3, 'database', file_page_2) - with open(file_page1, "rb+", 0) as f: - f.seek(84) - f.write(b"blah") - f.flush() - f.close - - file_page4 = os.path.join(backup_dir, 'backups/node', backup_id_6, 'database', file_page_5) - with open(file_page4, "rb+", 0) as f: - f.seek(42) - f.write(b"blah") - f.flush() - f.close - - # Validate PAGE3 - try: - self.validate_pb(backup_dir, 'node', - options=['--log-level-file=verbose', '-i', backup_id_4, '--xid={0}'.format(target_xid)]) - self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'INFO: Validating parents for backup {0}'.format(backup_id_4) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format(backup_id_1) in e.message - and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format(backup_id_2) in e.message - and 'INFO: Backup {0} data files are valid'.format(backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format(backup_id_3) in e.message - and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_page1) in e.message - and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_4, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_5, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_6, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_7, backup_id_3) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertTrue( - 'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"') - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "OK"') - self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "CORRUPT"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], 'Backup STATUS should be "OK"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_instance_with_corrupted_page(self): - """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, - corrupt file in PAGE1 backup and run validate on instance, - expect PAGE1 to gain status CORRUPT, PAGE2 to gain status ORPHAN""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,10000) i") - # FULL1 - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - node.safe_psql( - "postgres", - "create table t_heap1 as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,10000) i") - file_path_t_heap1 = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap1')").rstrip() - # PAGE1 - backup_id_2 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(20000,30000) i") - # PAGE2 - backup_id_3 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - # FULL1 - backup_id_4 = self.backup_node( - backup_dir, 'node', node) - # PAGE3 - backup_id_5 = self.backup_node( - backup_dir, 'node', node, backup_type='page') - - # Corrupt some file in FULL backup - file_full = os.path.join( - backup_dir, 'backups/node', backup_id_2, - 'database', file_path_t_heap1) - with open(file_full, "rb+", 0) as f: - f.seek(84) - f.write(b"blah") - f.flush() - f.close - - # Validate Instance - try: - self.validate_pb( - backup_dir, 'node', options=['--log-level-file=verbose']) - self.assertEqual( - 1, 0, - "Expecting Error because of data files corruption.\n " - "Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - "INFO: Validate backups of the instance 'node'" in e.message, - "\n Unexpected Error Message: {0}\n " - "CMD: {1}".format(repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_5) in e.message and - 'INFO: Backup {0} data files are valid'.format( - backup_id_5) in e.message and - 'INFO: Backup {0} WAL segments are valid'.format( - backup_id_5) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_4) in e.message and - 'INFO: Backup {0} data files are valid'.format( - backup_id_4) in e.message and - 'INFO: Backup {0} WAL segments are valid'.format( - backup_id_4) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_3) in e.message and - 'INFO: Backup {0} data files are valid'.format( - backup_id_3) in e.message and - 'INFO: Backup {0} WAL segments are valid'.format( - backup_id_3) in e.message and - 'WARNING: Backup {0} is orphaned because ' - 'his parent {1} is corrupted'.format( - backup_id_3, backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_2) in e.message and - 'WARNING: Invalid CRC of backup file "{0}"'.format( - file_full) in e.message and - 'WARNING: Backup {0} data files are corrupted'.format( - backup_id_2) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'INFO: Validating backup {0}'.format( - backup_id_1) in e.message and - 'INFO: Backup {0} data files are valid'.format( - backup_id_1) in e.message and - 'INFO: Backup {0} WAL segments are valid'.format( - backup_id_1) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertTrue( - 'WARNING: Some backups are not valid' in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertEqual( - 'OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], - 'Backup STATUS should be "OK"') - self.assertEqual( - 'CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], - 'Backup STATUS should be "CORRUPT"') - self.assertEqual( - 'ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], - 'Backup STATUS should be "ORPHAN"') - self.assertEqual( - 'OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], - 'Backup STATUS should be "OK"') - self.assertEqual( - 'OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], - 'Backup STATUS should be "OK"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_instance_with_corrupted_full_and_try_restore(self): - """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, - corrupt file in FULL backup and run validate on instance, - expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN, - try to restore backup with --no-validation option""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_path_t_heap = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - # FULL1 - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - # PAGE1 - backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE2 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") - backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # FULL1 - backup_id_4 = self.backup_node(backup_dir, 'node', node) - - # PAGE3 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i") - backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # Corrupt some file in FULL backup - file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap) - with open(file_full, "rb+", 0) as f: - f.seek(84) - f.write(b"blah") - f.flush() - f.close - - # Validate Instance - try: - self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) - self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'INFO: Validating backup {0}'.format(backup_id_1) in e.message - and "INFO: Validate backups of the instance 'node'" in e.message - and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message - and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"') - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"') - - node.cleanup() - restore_out = self.restore_node( - backup_dir, 'node', node, - options=["--no-validate"]) - self.assertIn( - "INFO: Restore of backup {0} completed.".format(backup_id_5), - restore_out, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(self.output), self.cmd)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_instance_with_corrupted_full(self): - """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, - corrupt file in FULL backup and run validate on instance, - expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - file_path_t_heap = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - # FULL1 - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - # PAGE1 - backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # PAGE2 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") - backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # FULL1 - backup_id_4 = self.backup_node(backup_dir, 'node', node) - - # PAGE3 - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i") - backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') - - # Corrupt some file in FULL backup - file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap) - with open(file_full, "rb+", 0) as f: - f.seek(84) - f.write(b"blah") - f.flush() - f.close - - # Validate Instance - try: - self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) - self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'INFO: Validating backup {0}'.format(backup_id_1) in e.message - and "INFO: Validate backups of the instance 'node'" in e.message - and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message - and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - - self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"') - self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupt_wal_1(self): - """make archive node, take FULL1, PAGE1,PAGE2,FULL2,PAGE3,PAGE4 backups, corrupt all wal files, run validate, expect errors""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id_1 = self.backup_node(backup_dir, 'node', node) - - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a text)") - con.commit() - - backup_id_2 = self.backup_node(backup_dir, 'node', node) - - # Corrupt WAL - wals_dir = os.path.join(backup_dir, 'wal', 'node') - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - wals.sort() - for wal in wals: - with open(os.path.join(wals_dir, wal), "rb+", 0) as f: - f.seek(42) - f.write(b"blablablaadssaaaaaaaaaaaaaaa") - f.flush() - f.close - - # Simple validate - try: - self.validate_pb(backup_dir, 'node') - self.assertEqual( - 1, 0, - "Expecting Error because of wal segments corruption.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'WARNING: Backup' in e.message and - 'WAL segments are corrupted' in e.message and - "WARNING: There are not enough WAL " - "records to consistenly restore backup" in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertEqual( - 'CORRUPT', - self.show_pb(backup_dir, 'node', backup_id_1)['status'], - 'Backup STATUS should be "CORRUPT"') - self.assertEqual( - 'CORRUPT', - self.show_pb(backup_dir, 'node', backup_id_2)['status'], - 'Backup STATUS should be "CORRUPT"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupt_wal_2(self): - """make archive node, make full backup, corrupt all wal files, run validate to real xid, expect errors""" - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a text)") - con.commit() - - backup_id = self.backup_node(backup_dir, 'node', node) - target_xid = None - with node.connect("postgres") as con: - res = con.execute( - "INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") - con.commit() - target_xid = res[0][0] - - # Corrupt WAL - wals_dir = os.path.join(backup_dir, 'wal', 'node') - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - wals.sort() - for wal in wals: - with open(os.path.join(wals_dir, wal), "rb+", 0) as f: - f.seek(128) - f.write(b"blablablaadssaaaaaaaaaaaaaaa") - f.flush() - f.close - - # Validate to xid - try: - self.validate_pb( - backup_dir, - 'node', - backup_id, - options=[ - "--log-level-console=verbose", - "--xid={0}".format(target_xid)]) - self.assertEqual( - 1, 0, - "Expecting Error because of wal segments corruption.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'WARNING: Backup' in e.message and - 'WAL segments are corrupted' in e.message and - "WARNING: There are not enough WAL " - "records to consistenly restore backup" in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertEqual( - 'CORRUPT', - self.show_pb(backup_dir, 'node', backup_id)['status'], - 'Backup STATUS should be "CORRUPT"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_wal_lost_segment_1(self): - """make archive node, make archive full backup, - delete from archive wal segment which belong to previous backup - run validate, expecting error because of missing wal segment - make sure that backup status is 'CORRUPT' - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "10"] - ) - pgbench.wait() - pgbench.stdout.close() - backup_id = self.backup_node(backup_dir, 'node', node) - - # Delete wal segment - wals_dir = os.path.join(backup_dir, 'wal', 'node') - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - wals.sort() - file = os.path.join(backup_dir, 'wal', 'node', wals[-1]) - os.remove(file) - - # cut out '.gz' - if self.archive_compress: - file = file[:-3] - - try: - self.validate_pb(backup_dir, 'node') - self.assertEqual( - 1, 0, - "Expecting Error because of wal segment disappearance.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - "WARNING: WAL segment \"{0}\" is absent".format( - file) in e.message and - "WARNING: There are not enough WAL records to consistenly " - "restore backup {0}".format(backup_id) in e.message and - "WARNING: Backup {0} WAL segments are corrupted".format( - backup_id) in e.message and - "WARNING: Some backups are not valid" in e.message, - "\n Unexpected Error Message: {0}\n CMD: {1}".format( - repr(e.message), self.cmd)) - - self.assertEqual( - 'CORRUPT', - self.show_pb(backup_dir, 'node', backup_id)['status'], - 'Backup {0} should have STATUS "CORRUPT"') - - # Run validate again - try: - self.validate_pb(backup_dir, 'node', backup_id) - self.assertEqual( - 1, 0, - "Expecting Error because of backup corruption.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertIn( - 'INFO: Revalidating backup {0}'.format(backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertIn( - 'ERROR: Backup {0} is corrupt.'.format(backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupt_wal_between_backups(self): - """ - make archive node, make full backup, corrupt all wal files, - run validate to real xid, expect errors - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - - # make some wals - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "10"] - ) - pgbench.wait() - pgbench.stdout.close() - - with node.connect("postgres") as con: - con.execute("CREATE TABLE tbl0005 (a text)") - con.commit() - - with node.connect("postgres") as con: - res = con.execute( - "INSERT INTO tbl0005 VALUES ('inserted') RETURNING (xmin)") - con.commit() - target_xid = res[0][0] - - if self.get_version(node) < self.version_to_num('10.0'): - walfile = node.safe_psql( - 'postgres', - 'select pg_xlogfile_name(pg_current_xlog_location())').rstrip() - else: - walfile = node.safe_psql( - 'postgres', - 'select pg_walfile_name(pg_current_wal_lsn())').rstrip() - - if self.archive_compress: - walfile = walfile + '.gz' - self.switch_wal_segment(node) - - # generate some wals - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "10"] - ) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node) - - # Corrupt WAL - wals_dir = os.path.join(backup_dir, 'wal', 'node') - with open(os.path.join(wals_dir, walfile), "rb+", 0) as f: - f.seek(9000) - f.write(b"b") - f.flush() - f.close - - # Validate to xid - try: - self.validate_pb( - backup_dir, - 'node', - backup_id, - options=[ - "--log-level-console=verbose", - "--xid={0}".format(target_xid)]) - self.assertEqual( - 1, 0, - "Expecting Error because of wal segments corruption.\n" - " Output: {0} \n CMD: {1}".format( - repr(self.output), self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'ERROR: not enough WAL records to xid' in e.message and - 'WARNING: recovery can be done up to time' in e.message and - "ERROR: not enough WAL records to xid {0}\n".format( - target_xid), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertEqual( - 'OK', - self.show_pb(backup_dir, 'node')[0]['status'], - 'Backup STATUS should be "OK"') - - self.assertEqual( - 'OK', - self.show_pb(backup_dir, 'node')[1]['status'], - 'Backup STATUS should be "OK"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_wal_lost_segment_2(self): - """ - make node with archiving - make archive backup - delete from archive wal segment which DO NOT belong to this backup - run validate, expecting error because of missing wal segment - make sure that backup status is 'ERROR' - """ - fname = self.id().split('.')[3] - node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.backup_node(backup_dir, 'node', node) - - # make some wals - node.pgbench_init(scale=2) - pgbench = node.pgbench( - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - options=["-c", "4", "-T", "10"] - ) - pgbench.wait() - pgbench.stdout.close() - - # delete last wal segment - wals_dir = os.path.join(backup_dir, 'wal', 'node') - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join( - wals_dir, f)) and not f.endswith('.backup')] - wals = map(str, wals) - file = os.path.join(wals_dir, max(wals)) - os.remove(file) - if self.archive_compress: - file = file[:-3] - - # Try to restore - try: - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type='page') - self.assertEqual( - 1, 0, - "Expecting Error because of wal segment disappearance.\n " - "Output: {0} \n CMD: {1}".format( - self.output, self.cmd)) - except ProbackupException as e: - self.assertTrue( - 'INFO: Wait for LSN' in e.message and - 'in archived WAL segment' in e.message and - 'WARNING: could not read WAL record at' in e.message and - 'ERROR: WAL segment "{0}" is absent\n'.format( - file) in e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertEqual( - 'ERROR', - self.show_pb(backup_dir, 'node')[1]['status'], - 'Backup {0} should have STATUS "ERROR"') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_pgpro702_688(self): - """make node without archiving, make stream backup, get Recovery Time, validate to Recovery Time""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node( - backup_dir, 'node', node, options=["--stream"]) - recovery_time = self.show_pb( - backup_dir, 'node', backup_id=backup_id)['recovery-time'] - - try: - self.validate_pb( - backup_dir, 'node', - options=["--time={0}".format(recovery_time)]) - self.assertEqual( - 1, 0, - "Expecting Error because of wal segment disappearance.\n " - "Output: {0} \n CMD: {1}".format( - self.output, self.cmd)) - except ProbackupException as e: - self.assertIn( - 'WAL archive is empty. You cannot restore backup to a ' - 'recovery target without WAL archive', e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_pgpro688(self): - """make node with archiving, make backup, get Recovery Time, validate to Recovery Time. Waiting PGPRO-688. RESOLVED""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - backup_id = self.backup_node(backup_dir, 'node', node) - recovery_time = self.show_pb(backup_dir, 'node', backup_id)['recovery-time'] - - self.validate_pb(backup_dir, 'node', options=["--time={0}".format(recovery_time)]) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_pgpro561(self): - """ - make node with archiving, make stream backup, - restore it to node1, check that archiving is not successful on node1 - """ - fname = self.id().split('.')[3] - node1 = self.make_simple_node( - base_dir="{0}/{1}/node1".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node1', node1) - self.set_archiving(backup_dir, 'node1', node1) - node1.start() - - backup_id = self.backup_node( - backup_dir, 'node1', node1, options=["--stream"]) - - node2 = self.make_simple_node( - base_dir="{0}/{1}/node2".format(module_name, fname)) - node2.cleanup() - - node1.psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") - - self.backup_node( - backup_dir, 'node1', node1, - backup_type='page', options=["--stream"]) - self.restore_node(backup_dir, 'node1', data_dir=node2.data_dir) - node2.append_conf( - 'postgresql.auto.conf', 'port = {0}'.format(node2.port)) - node2.slow_start() - - timeline_node1 = node1.get_control_data()["Latest checkpoint's TimeLineID"] - timeline_node2 = node2.get_control_data()["Latest checkpoint's TimeLineID"] - self.assertEqual( - timeline_node1, timeline_node2, - "Timelines on Master and Node1 should be equal. " - "This is unexpected") - - archive_command_node1 = node1.safe_psql( - "postgres", "show archive_command") - archive_command_node2 = node2.safe_psql( - "postgres", "show archive_command") - self.assertEqual( - archive_command_node1, archive_command_node2, - "Archive command on Master and Node should be equal. " - "This is unexpected") - - # result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL") - ## self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip())) - # if result == "": - # self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command') - - self.switch_wal_segment(node1) - self.switch_wal_segment(node2) - time.sleep(5) - - log_file = os.path.join(node2.logs_dir, 'postgresql.log') - with open(log_file, 'r') as f: - log_content = f.read() - self.assertTrue( - 'LOG: archive command failed with exit code 1' in log_content and - 'DETAIL: The failed archive command was:' in log_content and - 'INFO: pg_probackup archive-push from' in log_content, - 'Expecting error messages about failed archive_command' - ) - self.assertFalse( - 'pg_probackup archive-push completed successfully' in log_content) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupted_full(self): - """ - make node with archiving, take full backup, and three page backups, - take another full backup and three page backups - corrupt second full backup, run validate, check that - second full backup became CORRUPT and his page backups are ORPHANs - remove corruption and run valudate again, check that - second full backup and his page backups are OK - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type='page') - self.backup_node(backup_dir, 'node', node, backup_type='page') - - backup_id = self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type='page') - self.backup_node(backup_dir, 'node', node, backup_type='page') - - node.safe_psql( - "postgres", - "alter system set archive_command = 'false'") - node.reload() - try: - self.backup_node( - backup_dir, 'node', node, - backup_type='page', options=['--archive-timeout=1s']) - self.assertEqual( - 1, 0, - "Expecting Error because of data file dissapearance.\n " - "Output: {0} \n CMD: {1}".format( - self.output, self.cmd)) - except ProbackupException as e: - pass - self.assertTrue( - self.show_pb(backup_dir, 'node')[6]['status'] == 'ERROR') - self.set_archiving(backup_dir, 'node', node) - node.reload() - self.backup_node(backup_dir, 'node', node, backup_type='page') - - file = os.path.join( - backup_dir, 'backups', 'node', - backup_id, 'database', 'postgresql.auto.conf') - - file_new = os.path.join(backup_dir, 'postgresql.auto.conf') - os.rename(file, file_new) - - try: - self.validate_pb(backup_dir) - self.assertEqual( - 1, 0, - "Expecting Error because of data file dissapearance.\n " - "Output: {0} \n CMD: {1}".format( - self.output, self.cmd)) - except ProbackupException as e: - self.assertIn( - 'Validating backup {0}'.format(backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertIn( - 'WARNING: Backup {0} data files are corrupted'.format( - backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertIn( - 'WARNING: Some backups are not valid'.format( - backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') - self.assertTrue( - self.show_pb(backup_dir, 'node')[3]['status'] == 'CORRUPT') - self.assertTrue( - self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') - self.assertTrue( - self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') - self.assertTrue( - self.show_pb(backup_dir, 'node')[6]['status'] == 'ERROR') - self.assertTrue( - self.show_pb(backup_dir, 'node')[7]['status'] == 'ORPHAN') - - os.rename(file_new, file) - try: - self.validate_pb(backup_dir, options=['--log-level-file=verbose']) - except ProbackupException as e: - self.assertIn( - 'WARNING: Some backups are not valid'.format( - backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'OK') - self.assertTrue( - self.show_pb(backup_dir, 'node')[6]['status'] == 'ERROR') - self.assertTrue(self.show_pb(backup_dir, 'node')[7]['status'] == 'OK') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_validate_corrupted_full_1(self): - """ - make node with archiving, take full backup, and three page backups, - take another full backup and four page backups - corrupt second full backup, run validate, check that - second full backup became CORRUPT and his page backups are ORPHANs - remove corruption from full backup and corrupt his second page backup - run valudate again, check that - second full backup and his firts page backups are OK, - second page should be CORRUPT - third page should be ORPHAN - """ - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - set_replication=True, - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.start() - - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type='page') - self.backup_node(backup_dir, 'node', node, backup_type='page') - - backup_id = self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type='page') - backup_id_page = self.backup_node( - backup_dir, 'node', node, backup_type='page') - self.backup_node(backup_dir, 'node', node, backup_type='page') - - file = os.path.join( - backup_dir, 'backups', 'node', - backup_id, 'database', 'postgresql.auto.conf') - - file_new = os.path.join(backup_dir, 'postgresql.auto.conf') - os.rename(file, file_new) - - try: - self.validate_pb(backup_dir) - self.assertEqual( - 1, 0, - "Expecting Error because of data file dissapearance.\n " - "Output: {0} \n CMD: {1}".format( - self.output, self.cmd)) - except ProbackupException as e: - self.assertIn( - 'Validating backup {0}'.format(backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertIn( - 'WARNING: Backup {0} data files are corrupted'.format( - backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - self.assertIn( - 'WARNING: Some backups are not valid'.format( - backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'CORRUPT') - self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'ORPHAN') - self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'ORPHAN') - self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') - - os.rename(file_new, file) - file = os.path.join( - backup_dir, 'backups', 'node', - backup_id_page, 'database', 'postgresql.auto.conf') - - file_new = os.path.join(backup_dir, 'postgresql.auto.conf') - os.rename(file, file_new) - - try: - self.validate_pb(backup_dir, options=['--log-level-file=verbose']) - except ProbackupException as e: - self.assertIn( - 'WARNING: Some backups are not valid'.format( - backup_id), e.message, - '\n Unexpected Error Message: {0}\n CMD: {1}'.format( - repr(e.message), self.cmd)) - - self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[2]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[3]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[4]['status'] == 'OK') - self.assertTrue(self.show_pb(backup_dir, 'node')[5]['status'] == 'CORRUPT') - self.assertTrue(self.show_pb(backup_dir, 'node')[6]['status'] == 'ORPHAN') - - # Clean after yourself - self.del_test_dir(module_name, fname) - - def test_file_size_corruption_no_validate(self): - - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir="{0}/{1}/node".format(module_name, fname), - # initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - - node.start() - - node.safe_psql( - "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,1000) i") - node.safe_psql( - "postgres", - "CHECKPOINT;") - - heap_path = node.safe_psql( - "postgres", - "select pg_relation_filepath('t_heap')").rstrip() - heap_size = node.safe_psql( - "postgres", - "select pg_relation_size('t_heap')") - - backup_id = self.backup_node( - backup_dir, 'node', node, backup_type="full", - options=["-j", "4"], async=False, gdb=False) - - node.stop() - node.cleanup() - - # Let`s do file corruption - with open(os.path.join(backup_dir, "backups", 'node', backup_id, "database", heap_path), "rb+", 0) as f: - f.truncate(int(heap_size) - 4096) - f.flush() - f.close - - node.cleanup() - - try: - self.restore_node( - backup_dir, 'node', node, - options=["--no-validate"]) - except ProbackupException as e: - self.assertTrue("ERROR: Data files restoring failed" in e.message, repr(e.message)) - print "\nExpected error: \n" + e.message - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in new file mode 100644 index 000000000..a3c858ee2 --- /dev/null +++ b/travis/Dockerfile.in @@ -0,0 +1,28 @@ +FROM ololobus/postgres-dev:stretch + +USER root +RUN apt-get update +RUN apt-get -yq install python python-pip + +# RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py +# RUN python2 get-pip.py +RUN python2 -m pip install virtualenv + +# Environment +ENV PG_MAJOR=${PG_VERSION} PG_BRANCH=${PG_BRANCH} +ENV LANG=C.UTF-8 PGHOME=/pg/testdir/pgbin + +# Make directories +RUN mkdir -p /pg/testdir + +COPY run_tests.sh /run.sh +RUN chmod 755 /run.sh + +COPY . /pg/testdir +WORKDIR /pg/testdir + +# Grant privileges +RUN chown -R postgres:postgres /pg/testdir + +USER postgres +ENTRYPOINT MODE=${MODE} /run.sh diff --git a/travis/backup_restore.sh b/travis/backup_restore.sh index 7fe1cfd8f..b3c9df1ed 100644 --- a/travis/backup_restore.sh +++ b/travis/backup_restore.sh @@ -27,7 +27,7 @@ yum install -y postgresql95-devel make gcc readline-devel openssl-devel pam-deve make top_srcdir=postgresql-$PGVERSION make install top_srcdir=postgresql-$PGVERSION -# initalize cluster and database +# initialize cluster and database yum install -y postgresql95-server su postgres -c "/usr/pgsql-9.5/bin/initdb -D $PGDATA -k" cat < $PGDATA/pg_hba.conf diff --git a/travis/docker-compose.yml b/travis/docker-compose.yml new file mode 100644 index 000000000..471ab779f --- /dev/null +++ b/travis/docker-compose.yml @@ -0,0 +1,2 @@ +tests: + build: . diff --git a/travis/make_dockerfile.sh b/travis/make_dockerfile.sh new file mode 100755 index 000000000..3e6938bd9 --- /dev/null +++ b/travis/make_dockerfile.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env sh + +if [ -z ${PG_VERSION+x} ]; then + echo PG_VERSION is not set! + exit 1 +fi + +if [ -z ${PG_BRANCH+x} ]; then + echo PG_BRANCH is not set! + exit 1 +fi + +if [ -z ${MODE+x} ]; then + MODE=basic +fi + +echo PG_VERSION=${PG_VERSION} +echo PG_BRANCH=${PG_BRANCH} +echo MODE=${MODE} + +sed \ + -e 's/${PG_VERSION}/'${PG_VERSION}/g \ + -e 's/${PG_BRANCH}/'${PG_BRANCH}/g \ + -e 's/${MODE}/'${MODE}/g \ +Dockerfile.in > Dockerfile diff --git a/travis/run_tests.sh b/travis/run_tests.sh new file mode 100755 index 000000000..1bb3a6fde --- /dev/null +++ b/travis/run_tests.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2019-2020, Postgres Professional +# + + +PG_SRC=$PWD/postgres + +# # Here PG_VERSION is provided by postgres:X-alpine docker image +# curl "https://ftp.postgresql.org/pub/source/v$PG_VERSION/postgresql-$PG_VERSION.tar.bz2" -o postgresql.tar.bz2 +# echo "$PG_SHA256 *postgresql.tar.bz2" | sha256sum -c - + +# mkdir $PG_SRC + +# tar \ +# --extract \ +# --file postgresql.tar.bz2 \ +# --directory $PG_SRC \ +# --strip-components 1 + +# Clone Postgres +echo "############### Getting Postgres sources:" +git clone https://github.com/postgres/postgres.git -b $PG_BRANCH --depth=1 + +# Compile and install Postgres +echo "############### Compiling Postgres:" +cd postgres # Go to postgres dir +./configure --prefix=$PGHOME --enable-debug --enable-cassert --enable-depend --enable-tap-tests +make -s -j$(nproc) install +make -s -j$(nproc) -C contrib/ install + +# Override default Postgres instance +export PATH=$PGHOME/bin:$PATH +export LD_LIBRARY_PATH=$PGHOME/lib +export PG_CONFIG=$(which pg_config) + +# Get amcheck if missing +if [ ! -d "contrib/amcheck" ]; then + echo "############### Getting missing amcheck:" + git clone https://github.com/petergeoghegan/amcheck.git --depth=1 contrib/amcheck + make USE_PGXS=1 -C contrib/amcheck install +fi + +# Get back to testdir +cd .. + +# Show pg_config path (just in case) +echo "############### pg_config path:" +which pg_config + +# Show pg_config just in case +echo "############### pg_config:" +pg_config + +# Build and install pg_probackup (using PG_CPPFLAGS and SHLIB_LINK for gcov) +echo "############### Compiling and installing pg_probackup:" +# make USE_PGXS=1 PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" top_srcdir=$CUSTOM_PG_SRC install +make USE_PGXS=1 top_srcdir=$PG_SRC install + +# Setup python environment +echo "############### Setting up python env:" +python2 -m virtualenv pyenv +source pyenv/bin/activate +pip install testgres==1.8.2 + +echo "############### Testing:" +if [ "$MODE" = "basic" ]; then + export PG_PROBACKUP_TEST_BASIC=ON + python -m unittest -v tests + python -m unittest -v tests.init +else + python -m unittest -v tests.$MODE +fi + +# Generate *.gcov files +# gcov src/*.c src/*.h + +# Send coverage stats to Codecov +# bash <(curl -s https://codecov.io/bash) diff --git a/win32build.pl b/win32build.pl deleted file mode 100644 index 148641812..000000000 --- a/win32build.pl +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/perl -use JSON; -our $repack_version; -our $pgdir; -our $pgsrc; -if (@ARGV!=2) { - print STDERR "Usage $0 postgress-instalation-root pg-source-dir \n"; - exit 1; -} - - -our $liblist=""; - - -$pgdir = shift @ARGV; -$pgsrc = shift @ARGV if @ARGV; - - -our $arch = $ENV{'ARCH'} || "x64"; -$arch='Win32' if ($arch eq 'x86' || $arch eq 'X86'); -$arch='x64' if $arch eq 'X64'; - -$conffile = $pgsrc."/tools/msvc/config.pl"; - - -die 'Could not find config.pl' - unless (-f $conffile); - -our $config; -do $conffile; - - -if (! -d "$pgdir/bin" || !-d "$pgdir/include" || !-d "$pgdir/lib") { - print STDERR "Directory $pgdir doesn't look like root of postgresql installation\n"; - exit 1; -} -our $includepath=""; -our $libpath=""; -our $libpath32=""; -AddProject(); - -print "\n\n"; -print $libpath."\n"; -print $includepath."\n"; - -# open F,"<","META.json" or die "Cannot open META.json: $!\n"; -# { -# local $/ = undef; -# $decoded = decode_json(); -# $repack_version= $decoded->{'version'}; -# } - -# substitute new path in the project files - - - -preprocess_project("./msvs/template.pg_probackup.vcxproj","./msvs/pg_probackup.vcxproj"); - -exit 0; - - -sub preprocess_project { - my $in = shift; - my $out = shift; - our $pgdir; - our $adddir; - my $libs; - if (defined $adddir) { - $libs ="$adddir;"; - } else{ - $libs =""; - } - open IN,"<",$in or die "Cannot open $in: $!\n"; - open OUT,">",$out or die "Cannot open $out: $!\n"; - -# $includepath .= ";"; -# $libpath .= ";"; - - while () { - s/\@PGROOT\@/$pgdir/g; - s/\@ADDLIBS\@/$libpath/g; - s/\@ADDLIBS32\@/$libpath32/g; - s/\@PGSRC\@/$pgsrc/g; - s/\@ADDINCLUDE\@/$includepath/g; - - - print OUT $_; - } - close IN; - close OUT; - -} - - - -# my sub -sub AddLibrary -{ - $inc = shift; - if ($libpath ne '') - { - $libpath .= ';'; - } - $libpath .= $inc; - if ($libpath32 ne '') - { - $libpath32 .= ';'; - } - $libpath32 .= $inc; - -} -sub AddLibrary32 -{ - $inc = shift; - if ($libpath32 ne '') - { - $libpath32 .= ';'; - } - $libpath32 .= $inc; - -} -sub AddLibrary64 -{ - $inc = shift; - if ($libpath ne '') - { - $libpath .= ';'; - } - $libpath .= $inc; - -} - -sub AddIncludeDir -{ - # my ($self, $inc) = @_; - $inc = shift; - if ($includepath ne '') - { - $includepath .= ';'; - } - $includepath .= $inc; - -} - -sub AddProject -{ - # my ($self, $name, $type, $folder, $initialdir) = @_; - - if ($config->{zlib}) - { - AddIncludeDir($config->{zlib} . '\include'); - AddLibrary($config->{zlib} . '\lib\zdll.lib'); - } - if ($config->{openssl}) - { - AddIncludeDir($config->{openssl} . '\include'); - if (-e "$config->{openssl}/lib/VC/ssleay32MD.lib") - { - AddLibrary( - $config->{openssl} . '\lib\VC\ssleay32.lib', 1); - AddLibrary( - $config->{openssl} . '\lib\VC\libeay32.lib', 1); - } - else - { - # We don't expect the config-specific library to be here, - # so don't ask for it in last parameter - AddLibrary( - $config->{openssl} . '\lib\ssleay32.lib', 0); - AddLibrary( - $config->{openssl} . '\lib\libeay32.lib', 0); - } - } - if ($config->{nls}) - { - AddIncludeDir($config->{nls} . '\include'); - AddLibrary($config->{nls} . '\lib\libintl.lib'); - } - if ($config->{gss}) - { - AddIncludeDir($config->{gss} . '\inc\krb5'); - AddLibrary($config->{gss} . '\lib\i386\krb5_32.lib'); - AddLibrary($config->{gss} . '\lib\i386\comerr32.lib'); - AddLibrary($config->{gss} . '\lib\i386\gssapi32.lib'); - } - if ($config->{iconv}) - { - AddIncludeDir($config->{iconv} . '\include'); - AddLibrary($config->{iconv} . '\lib\iconv.lib'); - } - if ($config->{icu}) - { - AddIncludeDir($config->{icu} . '\include'); - AddLibrary32($config->{icu} . '\lib\icuin.lib'); - AddLibrary32($config->{icu} . '\lib\icuuc.lib'); - AddLibrary32($config->{icu} . '\lib\icudt.lib'); - AddLibrary64($config->{icu} . '\lib64\icuin.lib'); - AddLibrary64($config->{icu} . '\lib64\icuuc.lib'); - AddLibrary64($config->{icu} . '\lib64\icudt.lib'); - } - if ($config->{xml}) - { - AddIncludeDir($config->{xml} . '\include'); - AddIncludeDir($config->{xml} . '\include\libxml2'); - AddLibrary($config->{xml} . '\lib\libxml2.lib'); - } - if ($config->{xslt}) - { - AddIncludeDir($config->{xslt} . '\include'); - AddLibrary($config->{xslt} . '\lib\libxslt.lib'); - } - if ($config->{libedit}) - { - AddIncludeDir($config->{libedit} . '\include'); - # AddLibrary($config->{libedit} . "\\" . - # ($arch eq 'x64'? 'lib64': 'lib32').'\edit.lib'); - AddLibrary32($config->{libedit} . '\\lib32\edit.lib'); - AddLibrary64($config->{libedit} . '\\lib64\edit.lib'); - - - } - if ($config->{uuid}) - { - AddIncludeDir($config->{uuid} . '\include'); - AddLibrary($config->{uuid} . '\lib\uuid.lib'); - } - - if ($config->{zstd}) - { - AddIncludeDir($config->{zstd}); - # AddLibrary($config->{zstd}. "\\".($arch eq 'x64'? "zstdlib_x64.lib" : "zstdlib_x86.lib")); - AddLibrary32($config->{zstd}. "\\zstdlib_x86.lib"); - AddLibrary64($config->{zstd}. "\\zstdlib_x64.lib") ; - } - # return $proj; -} - - - - diff --git a/win32build96.pl b/win32build96.pl deleted file mode 100644 index c869e485b..000000000 --- a/win32build96.pl +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/perl -use JSON; -our $repack_version; -our $pgdir; -our $pgsrc; -if (@ARGV!=2) { - print STDERR "Usage $0 postgress-instalation-root pg-source-dir \n"; - exit 1; -} - - -our $liblist=""; - - -$pgdir = shift @ARGV; -$pgsrc = shift @ARGV if @ARGV; - - -our $arch = $ENV{'ARCH'} || "x64"; -$arch='Win32' if ($arch eq 'x86' || $arch eq 'X86'); -$arch='x64' if $arch eq 'X64'; - -$conffile = $pgsrc."/tools/msvc/config.pl"; - - -die 'Could not find config.pl' - unless (-f $conffile); - -our $config; -do $conffile; - - -if (! -d "$pgdir/bin" || !-d "$pgdir/include" || !-d "$pgdir/lib") { - print STDERR "Directory $pgdir doesn't look like root of postgresql installation\n"; - exit 1; -} -our $includepath=""; -our $libpath=""; -our $libpath32=""; -AddProject(); - -print "\n\n"; -print $libpath."\n"; -print $includepath."\n"; - -# open F,"<","META.json" or die "Cannot open META.json: $!\n"; -# { -# local $/ = undef; -# $decoded = decode_json(); -# $repack_version= $decoded->{'version'}; -# } - -# substitute new path in the project files - - - -preprocess_project("./msvs/template.pg_probackup96.vcxproj","./msvs/pg_probackup.vcxproj"); - -exit 0; - - -sub preprocess_project { - my $in = shift; - my $out = shift; - our $pgdir; - our $adddir; - my $libs; - if (defined $adddir) { - $libs ="$adddir;"; - } else{ - $libs =""; - } - open IN,"<",$in or die "Cannot open $in: $!\n"; - open OUT,">",$out or die "Cannot open $out: $!\n"; - -# $includepath .= ";"; -# $libpath .= ";"; - - while () { - s/\@PGROOT\@/$pgdir/g; - s/\@ADDLIBS\@/$libpath/g; - s/\@ADDLIBS32\@/$libpath32/g; - s/\@PGSRC\@/$pgsrc/g; - s/\@ADDINCLUDE\@/$includepath/g; - - - print OUT $_; - } - close IN; - close OUT; - -} - - - -# my sub -sub AddLibrary -{ - $inc = shift; - if ($libpath ne '') - { - $libpath .= ';'; - } - $libpath .= $inc; - if ($libpath32 ne '') - { - $libpath32 .= ';'; - } - $libpath32 .= $inc; - -} -sub AddLibrary32 -{ - $inc = shift; - if ($libpath32 ne '') - { - $libpath32 .= ';'; - } - $libpath32 .= $inc; - -} -sub AddLibrary64 -{ - $inc = shift; - if ($libpath ne '') - { - $libpath .= ';'; - } - $libpath .= $inc; - -} - -sub AddIncludeDir -{ - # my ($self, $inc) = @_; - $inc = shift; - if ($includepath ne '') - { - $includepath .= ';'; - } - $includepath .= $inc; - -} - -sub AddProject -{ - # my ($self, $name, $type, $folder, $initialdir) = @_; - - if ($config->{zlib}) - { - AddIncludeDir($config->{zlib} . '\include'); - AddLibrary($config->{zlib} . '\lib\zdll.lib'); - } - if ($config->{openssl}) - { - AddIncludeDir($config->{openssl} . '\include'); - if (-e "$config->{openssl}/lib/VC/ssleay32MD.lib") - { - AddLibrary( - $config->{openssl} . '\lib\VC\ssleay32.lib', 1); - AddLibrary( - $config->{openssl} . '\lib\VC\libeay32.lib', 1); - } - else - { - # We don't expect the config-specific library to be here, - # so don't ask for it in last parameter - AddLibrary( - $config->{openssl} . '\lib\ssleay32.lib', 0); - AddLibrary( - $config->{openssl} . '\lib\libeay32.lib', 0); - } - } - if ($config->{nls}) - { - AddIncludeDir($config->{nls} . '\include'); - AddLibrary($config->{nls} . '\lib\libintl.lib'); - } - if ($config->{gss}) - { - AddIncludeDir($config->{gss} . '\inc\krb5'); - AddLibrary($config->{gss} . '\lib\i386\krb5_32.lib'); - AddLibrary($config->{gss} . '\lib\i386\comerr32.lib'); - AddLibrary($config->{gss} . '\lib\i386\gssapi32.lib'); - } - if ($config->{iconv}) - { - AddIncludeDir($config->{iconv} . '\include'); - AddLibrary($config->{iconv} . '\lib\iconv.lib'); - } - if ($config->{icu}) - { - AddIncludeDir($config->{icu} . '\include'); - AddLibrary32($config->{icu} . '\lib\icuin.lib'); - AddLibrary32($config->{icu} . '\lib\icuuc.lib'); - AddLibrary32($config->{icu} . '\lib\icudt.lib'); - AddLibrary64($config->{icu} . '\lib64\icuin.lib'); - AddLibrary64($config->{icu} . '\lib64\icuuc.lib'); - AddLibrary64($config->{icu} . '\lib64\icudt.lib'); - } - if ($config->{xml}) - { - AddIncludeDir($config->{xml} . '\include'); - AddIncludeDir($config->{xml} . '\include\libxml2'); - AddLibrary($config->{xml} . '\lib\libxml2.lib'); - } - if ($config->{xslt}) - { - AddIncludeDir($config->{xslt} . '\include'); - AddLibrary($config->{xslt} . '\lib\libxslt.lib'); - } - if ($config->{libedit}) - { - AddIncludeDir($config->{libedit} . '\include'); - # AddLibrary($config->{libedit} . "\\" . - # ($arch eq 'x64'? 'lib64': 'lib32').'\edit.lib'); - AddLibrary32($config->{libedit} . '\\lib32\edit.lib'); - AddLibrary64($config->{libedit} . '\\lib64\edit.lib'); - - - } - if ($config->{uuid}) - { - AddIncludeDir($config->{uuid} . '\include'); - AddLibrary($config->{uuid} . '\lib\uuid.lib'); - } - - if ($config->{zstd}) - { - AddIncludeDir($config->{zstd}); - # AddLibrary($config->{zstd}. "\\".($arch eq 'x64'? "zstdlib_x64.lib" : "zstdlib_x86.lib")); - AddLibrary32($config->{zstd}. "\\zstdlib_x86.lib"); - AddLibrary64($config->{zstd}. "\\zstdlib_x64.lib") ; - } - # return $proj; -} - - - - diff --git a/win32build_2.pl b/win32build_2.pl deleted file mode 100644 index a4f75553c..000000000 --- a/win32build_2.pl +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/perl -use JSON; -our $repack_version; -our $pgdir; -our $pgsrc; -if (@ARGV!=2) { - print STDERR "Usage $0 postgress-instalation-root pg-source-dir \n"; - exit 1; -} - - -our $liblist=""; - - -$pgdir = shift @ARGV; -$pgsrc = shift @ARGV if @ARGV; - - -our $arch = $ENV{'ARCH'} || "x64"; -$arch='Win32' if ($arch eq 'x86' || $arch eq 'X86'); -$arch='x64' if $arch eq 'X64'; - -$conffile = $pgsrc."/tools/msvc/config.pl"; - - -die 'Could not find config.pl' - unless (-f $conffile); - -our $config; -do $conffile; - - -if (! -d "$pgdir/bin" || !-d "$pgdir/include" || !-d "$pgdir/lib") { - print STDERR "Directory $pgdir doesn't look like root of postgresql installation\n"; - exit 1; -} -our $includepath=""; -our $libpath=""; -AddProject(); - -print "\n\n"; -print $libpath."\n"; -print $includepath."\n"; - -# open F,"<","META.json" or die "Cannot open META.json: $!\n"; -# { -# local $/ = undef; -# $decoded = decode_json(); -# $repack_version= $decoded->{'version'}; -# } - -# substitute new path in the project files - - - -preprocess_project("./msvs/template.pg_probackup_2.vcxproj","./msvs/pg_probackup.vcxproj"); - -exit 0; - - -sub preprocess_project { - my $in = shift; - my $out = shift; - our $pgdir; - our $adddir; - my $libs; - if (defined $adddir) { - $libs ="$adddir;"; - } else{ - $libs =""; - } - open IN,"<",$in or die "Cannot open $in: $!\n"; - open OUT,">",$out or die "Cannot open $out: $!\n"; - -# $includepath .= ";"; -# $libpath .= ";"; - - while () { - s/\@PGROOT\@/$pgdir/g; - s/\@ADDLIBS\@/$libpath/g; - s/\@PGSRC\@/$pgsrc/g; - s/\@ADDINCLUDE\@/$includepath/g; - - - print OUT $_; - } - close IN; - close OUT; - -} - - - -# my sub -sub AddLibrary -{ - $inc = shift; - if ($libpath ne '') - { - $libpath .= ';'; - } - $libpath .= $inc; - -} -sub AddIncludeDir -{ - # my ($self, $inc) = @_; - $inc = shift; - if ($includepath ne '') - { - $includepath .= ';'; - } - $includepath .= $inc; - -} - -sub AddProject -{ - # my ($self, $name, $type, $folder, $initialdir) = @_; - - if ($config->{zlib}) - { - AddIncludeDir($config->{zlib} . '\include'); - AddLibrary($config->{zlib} . '\lib\zdll.lib'); - } - if ($config->{openssl}) - { - AddIncludeDir($config->{openssl} . '\include'); - if (-e "$config->{openssl}/lib/VC/ssleay32MD.lib") - { - AddLibrary( - $config->{openssl} . '\lib\VC\ssleay32.lib', 1); - AddLibrary( - $config->{openssl} . '\lib\VC\libeay32.lib', 1); - } - else - { - # We don't expect the config-specific library to be here, - # so don't ask for it in last parameter - AddLibrary( - $config->{openssl} . '\lib\ssleay32.lib', 0); - AddLibrary( - $config->{openssl} . '\lib\libeay32.lib', 0); - } - } - if ($config->{nls}) - { - AddIncludeDir($config->{nls} . '\include'); - AddLibrary($config->{nls} . '\lib\libintl.lib'); - } - if ($config->{gss}) - { - AddIncludeDir($config->{gss} . '\inc\krb5'); - AddLibrary($config->{gss} . '\lib\i386\krb5_32.lib'); - AddLibrary($config->{gss} . '\lib\i386\comerr32.lib'); - AddLibrary($config->{gss} . '\lib\i386\gssapi32.lib'); - } - if ($config->{iconv}) - { - AddIncludeDir($config->{iconv} . '\include'); - AddLibrary($config->{iconv} . '\lib\iconv.lib'); - } - if ($config->{icu}) - { - AddIncludeDir($config->{icu} . '\include'); - if ($arch eq 'Win32') - { - AddLibrary($config->{icu} . '\lib\icuin.lib'); - AddLibrary($config->{icu} . '\lib\icuuc.lib'); - AddLibrary($config->{icu} . '\lib\icudt.lib'); - } - else - { - AddLibrary($config->{icu} . '\lib64\icuin.lib'); - AddLibrary($config->{icu} . '\lib64\icuuc.lib'); - AddLibrary($config->{icu} . '\lib64\icudt.lib'); - } - } - if ($config->{xml}) - { - AddIncludeDir($config->{xml} . '\include'); - AddIncludeDir($config->{xml} . '\include\libxml2'); - AddLibrary($config->{xml} . '\lib\libxml2.lib'); - } - if ($config->{xslt}) - { - AddIncludeDir($config->{xslt} . '\include'); - AddLibrary($config->{xslt} . '\lib\libxslt.lib'); - } - if ($config->{libedit}) - { - AddIncludeDir($config->{libedit} . '\include'); - AddLibrary($config->{libedit} . "\\" . - ($arch eq 'x64'? 'lib64': 'lib32').'\edit.lib'); - } - if ($config->{uuid}) - { - AddIncludeDir($config->{uuid} . '\include'); - AddLibrary($config->{uuid} . '\lib\uuid.lib'); - } - if ($config->{libedit}) - { - AddIncludeDir($config->{libedit} . '\include'); - AddLibrary($config->{libedit} . "\\" . - ($arch eq 'x64'? 'lib64': 'lib32').'\edit.lib'); - } - if ($config->{zstd}) - { - AddIncludeDir($config->{zstd}); - AddLibrary($config->{zstd}. "\\". - ($arch eq 'x64'? "zstdlib_x64.lib" : "zstdlib_x86.lib") - ); - } - # return $proj; -} - - - -