Support COPY TO for partitioned tables.
authorMasahiko Sawada <[email protected]>
Mon, 20 Oct 2025 17:38:52 +0000 (10:38 -0700)
committerMasahiko Sawada <[email protected]>
Mon, 20 Oct 2025 17:38:52 +0000 (10:38 -0700)
Previously, COPY TO command didn't support directly specifying
partitioned tables so users had to use COPY (SELECT ...) TO variant.

This commit adds direct COPY TO support for partitioned
tables, improving both usability and performance. Performance tests
show it's faster than the COPY (SELECT ...) TO variant as it avoids
the overheads of query processing and sending results to the COPY TO
command.

When used with partitioned tables, COPY TO copies the same rows as
SELECT * FROM table. Row-level security policies of the partitioned
table are applied in the same way as when executing COPY TO on a plain
table.

Author: jian he <[email protected]>
Reviewed-by: vignesh C <[email protected]>
Reviewed-by: David Rowley <[email protected]>
Reviewed-by: Melih Mutlu <[email protected]>
Reviewed-by: Kirill Reshke <[email protected]>
Reviewed-by: Atsushi Torikoshi <[email protected]>
Reviewed-by: Álvaro Herrera <[email protected]>
Reviewed-by: Masahiko Sawada <[email protected]>
Reviewed-by: Chao Li <[email protected]>
Discussion: https://postgr.es/m/CACJufxEZt%2BG19Ors3bQUq-42-61__C%3Dy5k2wk%3DsHEFRusu7%3DiQ%40mail.gmail.com

contrib/postgres_fdw/expected/postgres_fdw.out
contrib/postgres_fdw/sql/postgres_fdw.sql
doc/src/sgml/ref/copy.sgml
src/backend/commands/copy.c
src/backend/commands/copyto.c
src/test/regress/expected/copy.out
src/test/regress/expected/rowsecurity.out
src/test/regress/sql/copy.sql
src/test/regress/sql/rowsecurity.sql

index 91bbd0d8c7347eefc9ec0b3c0ecb6fae95564763..cd28126049df7b1b876d3fa2ca63fd3241aa8f16 100644 (file)
@@ -11599,6 +11599,11 @@ SELECT * FROM result_tbl ORDER BY a;
 (3 rows)
 
 DELETE FROM result_tbl;
+-- Test COPY TO when foreign table is partition
+COPY async_pt TO stdout; --error
+ERROR:  cannot copy from foreign table "async_p1"
+DETAIL:  Partition "async_p1" is a foreign table in partitioned table "async_pt"
+HINT:  Try the COPY (SELECT ...) TO variant.
 DROP FOREIGN TABLE async_p3;
 DROP TABLE base_tbl3;
 -- Check case where the partitioned table has local/remote partitions
index 3b7da1285192c0dc424cefee8f391bbe3c56afbc..9a8f9e28135397e3833fb8d2cd48dd735a11c896 100644 (file)
@@ -3941,6 +3941,9 @@ INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
 SELECT * FROM result_tbl ORDER BY a;
 DELETE FROM result_tbl;
 
+-- Test COPY TO when foreign table is partition
+COPY async_pt TO stdout; --error
+
 DROP FOREIGN TABLE async_p3;
 DROP TABLE base_tbl3;
 
index c2d1fbc1fbe942c24881a1679905c375589252c9..fdc24b36bb8efa7728cae064be001cc5b9a5a838 100644 (file)
@@ -539,13 +539,14 @@ COPY <replaceable class="parameter">count</replaceable>
 
    <para>
     <command>COPY TO</command> can be used with plain
-    tables and populated materialized views.
-    For example,
-    <literal>COPY <replaceable class="parameter">table</replaceable>
-    TO</literal> copies the same rows as
+    tables, populated materialized views, and partitioned tables.
+    For non-partitioned tables, COPY <replaceable class="parameter">table</replaceable>
+    copies the same rows as
     <literal>SELECT * FROM ONLY <replaceable class="parameter">table</replaceable></literal>.
+    For partitioned tables, it copies the same rows as
+    <literal>SELECT * FROM <replaceable class="parameter">table</replaceable></literal>.
     However it doesn't directly support other relation types,
-    such as partitioned tables, inheritance child tables, or views.
+    such as inheritance child tables, or views.
     To copy all rows from such relations, use <literal>COPY (SELECT * FROM
     <replaceable class="parameter">table</replaceable>) TO</literal>.
    </para>
index fae9c41db6565cafe058f2c9a6889c92eae61a8e..44020d0ae804a7ceaa2b96a533960010ad164e4e 100644 (file)
@@ -251,11 +251,15 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
             * relation which we have opened and locked.  Use "ONLY" so that
             * COPY retrieves rows from only the target table not any
             * inheritance children, the same as when RLS doesn't apply.
+            *
+            * However, when copying data from a partitioned table, we don't
+            * use "ONLY", since we need to retrieve rows from its descendant
+            * tables too.
             */
            from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
                                pstrdup(RelationGetRelationName(rel)),
                                -1);
-           from->inh = false;  /* apply ONLY */
+           from->inh = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
 
            /* Build query */
            select = makeNode(SelectStmt);
index e5781155cdfa8756458945986e0beced03cb6add..a1919c6db43d9cb9c2fd57630b0db73140cb1bae 100644 (file)
@@ -18,7 +18,9 @@
 #include <unistd.h>
 #include <sys/stat.h>
 
+#include "access/table.h"
 #include "access/tableam.h"
+#include "catalog/pg_inherits.h"
 #include "commands/copyapi.h"
 #include "commands/progress.h"
 #include "executor/execdesc.h"
@@ -86,6 +88,7 @@ typedef struct CopyToStateData
 
    CopyFormatOptions opts;
    Node       *whereClause;    /* WHERE condition (or NULL) */
+   List       *partitions;     /* OID list of partitions to copy data from */
 
    /*
     * Working state
@@ -116,6 +119,8 @@ static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
 static void CopyAttributeOutText(CopyToState cstate, const char *string);
 static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
                                bool use_quote);
+static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel,
+                          uint64 *processed);
 
 /* built-in format-specific routines */
 static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
@@ -602,6 +607,10 @@ EndCopy(CopyToState cstate)
    pgstat_progress_end_command();
 
    MemoryContextDelete(cstate->copycontext);
+
+   if (cstate->partitions)
+       list_free(cstate->partitions);
+
    pfree(cstate);
 }
 
@@ -643,6 +652,7 @@ BeginCopyTo(ParseState *pstate,
        PROGRESS_COPY_COMMAND_TO,
        0
    };
+   List       *children = NIL;
 
    if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
    {
@@ -673,11 +683,34 @@ BeginCopyTo(ParseState *pstate,
                     errmsg("cannot copy from sequence \"%s\"",
                            RelationGetRelationName(rel))));
        else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-           ereport(ERROR,
-                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                    errmsg("cannot copy from partitioned table \"%s\"",
-                           RelationGetRelationName(rel)),
-                    errhint("Try the COPY (SELECT ...) TO variant.")));
+       {
+           /*
+            * Collect OIDs of relation containing data, so that later
+            * DoCopyTo can copy the data from them.
+            */
+           children = find_all_inheritors(RelationGetRelid(rel), AccessShareLock, NULL);
+
+           foreach_oid(child, children)
+           {
+               char        relkind = get_rel_relkind(child);
+
+               if (relkind == RELKIND_FOREIGN_TABLE)
+               {
+                   char       *relation_name = get_rel_name(child);
+
+                   ereport(ERROR,
+                           errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                           errmsg("cannot copy from foreign table \"%s\"", relation_name),
+                           errdetail("Partition \"%s\" is a foreign table in partitioned table \"%s\"",
+                                     relation_name, RelationGetRelationName(rel)),
+                           errhint("Try the COPY (SELECT ...) TO variant."));
+               }
+
+               /* Exclude tables with no data */
+               if (RELKIND_HAS_PARTITIONS(relkind))
+                   children = foreach_delete_current(children, child);
+           }
+       }
        else
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -713,6 +746,7 @@ BeginCopyTo(ParseState *pstate,
        cstate->rel = rel;
 
        tupDesc = RelationGetDescr(cstate->rel);
+       cstate->partitions = children;
    }
    else
    {
@@ -722,6 +756,7 @@ BeginCopyTo(ParseState *pstate,
        DestReceiver *dest;
 
        cstate->rel = NULL;
+       cstate->partitions = NIL;
 
        /*
         * Run parse analysis and rewrite.  Note this also acquires sufficient
@@ -1030,7 +1065,7 @@ DoCopyTo(CopyToState cstate)
    TupleDesc   tupDesc;
    int         num_phys_attrs;
    ListCell   *cur;
-   uint64      processed;
+   uint64      processed = 0;
 
    if (fe_copy)
        SendCopyBegin(cstate);
@@ -1070,33 +1105,24 @@ DoCopyTo(CopyToState cstate)
 
    if (cstate->rel)
    {
-       TupleTableSlot *slot;
-       TableScanDesc scandesc;
-
-       scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
-       slot = table_slot_create(cstate->rel, NULL);
-
-       processed = 0;
-       while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+       /*
+        * If COPY TO source table is a partitioned table, then open each
+        * partition and process each individual partition.
+        */
+       if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
        {
-           CHECK_FOR_INTERRUPTS();
-
-           /* Deconstruct the tuple ... */
-           slot_getallattrs(slot);
-
-           /* Format and send the data */
-           CopyOneRowTo(cstate, slot);
+           foreach_oid(child, cstate->partitions)
+           {
+               Relation    scan_rel;
 
-           /*
-            * Increment the number of processed tuples, and report the
-            * progress.
-            */
-           pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
-                                        ++processed);
+               /* We already got the lock in BeginCopyTo */
+               scan_rel = table_open(child, NoLock);
+               CopyRelationTo(cstate, scan_rel, cstate->rel, &processed);
+               table_close(scan_rel, NoLock);
+           }
        }
-
-       ExecDropSingleTupleTableSlot(slot);
-       table_endscan(scandesc);
+       else
+           CopyRelationTo(cstate, cstate->rel, NULL, &processed);
    }
    else
    {
@@ -1115,6 +1141,73 @@ DoCopyTo(CopyToState cstate)
    return processed;
 }
 
+/*
+ * Scans a single table and exports its rows to the COPY destination.
+ *
+ * root_rel can be set to the root table of rel if rel is a partition
+ * table so that we can send tuples in root_rel's rowtype, which might
+ * differ from individual partitions.
+*/
+static void
+CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *processed)
+{
+   TupleTableSlot *slot;
+   TableScanDesc scandesc;
+   AttrMap    *map = NULL;
+   TupleTableSlot *root_slot = NULL;
+
+   scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+   slot = table_slot_create(rel, NULL);
+
+   /*
+    * If we are exporting partition data here, we check if converting tuples
+    * to the root table's rowtype, because a partition might have column
+    * order different than its root table.
+    */
+   if (root_rel != NULL)
+   {
+       root_slot = table_slot_create(root_rel, NULL);
+       map = build_attrmap_by_name_if_req(RelationGetDescr(root_rel),
+                                          RelationGetDescr(rel),
+                                          false);
+   }
+
+   while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+   {
+       TupleTableSlot *copyslot;
+
+       CHECK_FOR_INTERRUPTS();
+
+       if (map != NULL)
+           copyslot = execute_attr_map_slot(map, slot, root_slot);
+       else
+       {
+           /* Deconstruct the tuple */
+           slot_getallattrs(slot);
+           copyslot = slot;
+       }
+
+       /* Format and send the data */
+       CopyOneRowTo(cstate, copyslot);
+
+       /*
+        * Increment the number of processed tuples, and report the progress.
+        */
+       pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+                                    ++(*processed));
+   }
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   if (root_slot != NULL)
+       ExecDropSingleTupleTableSlot(root_slot);
+
+   if (map != NULL)
+       free_attrmap(map);
+
+   table_endscan(scandesc);
+}
+
 /*
  * Emit one row during DoCopyTo().
  */
index ac66eb55aeed4a6a864c0d6f826fde7f0a38c57e..24e0f472f14ad3cd92c5536a80050b0bce016142 100644 (file)
@@ -373,3 +373,23 @@ COPY copytest_mv(id) TO stdout WITH (header);
 id
 1
 DROP MATERIALIZED VIEW copytest_mv;
+-- Tests for COPY TO with partitioned tables.
+-- The child table pp_2 has a different column order than the root table pp.
+-- Check if COPY TO exports tuples as the root table's column order.
+CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id);
+CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id);
+CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id);
+ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5);
+ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10);
+CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5);
+CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10);
+INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g;
+COPY pp TO stdout(header);
+id val
+1  11
+2  12
+3  13
+4  14
+5  15
+6  16
+DROP TABLE PP;
index 5a172c5d91c8e0004a65575cc18f0654761700e7..42b78a2460302e2cf5377b15d17b2b5b6e7af371 100644 (file)
@@ -986,6 +986,11 @@ NOTICE:  f_leak => my first satire
    9 |  11 |      1 | regress_rls_dave  | awesome science fiction
 (4 rows)
 
+COPY part_document TO stdout WITH (DELIMITER ',');
+1,11,1,regress_rls_bob,my first novel
+6,11,1,regress_rls_carol,great science fiction
+9,11,1,regress_rls_dave,awesome science fiction
+4,55,1,regress_rls_bob,my first satire
 EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
                                QUERY PLAN                                
 -------------------------------------------------------------------------
@@ -1028,6 +1033,17 @@ NOTICE:  f_leak => awesome technology book
   10 |  99 |      2 | regress_rls_dave  | awesome technology book
 (10 rows)
 
+COPY part_document TO stdout WITH (DELIMITER ',');
+1,11,1,regress_rls_bob,my first novel
+2,11,2,regress_rls_bob,my second novel
+6,11,1,regress_rls_carol,great science fiction
+9,11,1,regress_rls_dave,awesome science fiction
+4,55,1,regress_rls_bob,my first satire
+8,55,2,regress_rls_carol,great satire
+3,99,2,regress_rls_bob,my science textbook
+5,99,2,regress_rls_bob,my history book
+7,99,2,regress_rls_carol,great technology book
+10,99,2,regress_rls_dave,awesome technology book
 EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
                                QUERY PLAN                                
 -------------------------------------------------------------------------
@@ -1058,6 +1074,11 @@ NOTICE:  f_leak => awesome science fiction
    9 |  11 |      1 | regress_rls_dave  | awesome science fiction
 (4 rows)
 
+COPY part_document TO stdout WITH (DELIMITER ',');
+1,11,1,regress_rls_bob,my first novel
+2,11,2,regress_rls_bob,my second novel
+6,11,1,regress_rls_carol,great science fiction
+9,11,1,regress_rls_dave,awesome science fiction
 EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
index a1316c73bac6956e5649aeb89674734212876186..676a8b342b566f3f98dc649b1b26d1b2d373c226 100644 (file)
@@ -405,3 +405,17 @@ COPY copytest_mv(id) TO stdout WITH (header);
 REFRESH MATERIALIZED VIEW copytest_mv;
 COPY copytest_mv(id) TO stdout WITH (header);
 DROP MATERIALIZED VIEW copytest_mv;
+
+-- Tests for COPY TO with partitioned tables.
+-- The child table pp_2 has a different column order than the root table pp.
+-- Check if COPY TO exports tuples as the root table's column order.
+CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id);
+CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id);
+CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id);
+ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5);
+ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10);
+CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5);
+CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10);
+INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g;
+COPY pp TO stdout(header);
+DROP TABLE PP;
index 21ac0ca51eed1bac62ec3fd15e0ebc7bf00183ab..2d1be543391a72a1551f19e705f0919a666f7dd1 100644 (file)
@@ -362,16 +362,19 @@ SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename
 SET SESSION AUTHORIZATION regress_rls_bob;
 SET row_security TO ON;
 SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
+COPY part_document TO stdout WITH (DELIMITER ',');
 EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
 
 -- viewpoint from regress_rls_carol
 SET SESSION AUTHORIZATION regress_rls_carol;
 SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
+COPY part_document TO stdout WITH (DELIMITER ',');
 EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
 
 -- viewpoint from regress_rls_dave
 SET SESSION AUTHORIZATION regress_rls_dave;
 SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did;
+COPY part_document TO stdout WITH (DELIMITER ',');
 EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle);
 
 -- pp1 ERROR