Add progress reporting of skipped tuples during COPY FROM.
authorMasahiko Sawada <[email protected]>
Thu, 25 Jan 2024 01:57:41 +0000 (10:57 +0900)
committerMasahiko Sawada <[email protected]>
Thu, 25 Jan 2024 01:57:41 +0000 (10:57 +0900)
9e2d870119 enabled the COPY command to skip malformed data, however
there was no visibility into how many tuples were actually skipped
during the COPY FROM.

This commit adds a new "tuples_skipped" column to
pg_stat_progress_copy view to report the number of tuples that were
skipped because they contain malformed data.

Bump catalog version.

Author: Atsushi Torikoshi
Reviewed-by: Masahiko Sawada
Discussion: https://postgr.es/m/d12fd8c99adcae2744212cb23feff6ed%40oss.nttdata.com

doc/src/sgml/monitoring.sgml
src/backend/catalog/system_views.sql
src/backend/commands/copyfrom.c
src/include/catalog/catversion.h
src/include/commands/progress.h
src/test/regress/expected/rules.out

index 6e74138a69aeca0f1d556fd8ea79246e3728a197..d9b8b37585c15da9ab87191a51ed9a5d1240a010 100644 (file)
@@ -5780,6 +5780,18 @@ FROM pg_stat_get_backend_idset() AS backendid;
        <command>WHERE</command> clause of the <command>COPY</command> command.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>tuples_skipped</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of tuples skipped because they contain malformed data.
+       This counter only advances when a value other than
+       <literal>stop</literal> is specified to the <literal>ON_ERROR</literal>
+       option.
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
index e43e36f5ac68dab06be0f016c3f59d51a797c95c..6288270e2b2bf6d8281fe2c625202b7c44a0c02b 100644 (file)
@@ -1318,7 +1318,8 @@ CREATE VIEW pg_stat_progress_copy AS
         S.param1 AS bytes_processed,
         S.param2 AS bytes_total,
         S.param3 AS tuples_processed,
-        S.param4 AS tuples_excluded
+        S.param4 AS tuples_excluded,
+        S.param7 AS tuples_skipped
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
index 173a736ad5231710e353b55ccb46e1e009e1031e..1fe70b913382760c52097557b9a3e4e4b9d773f4 100644 (file)
@@ -650,6 +650,7 @@ CopyFrom(CopyFromState cstate)
    CopyMultiInsertInfo multiInsertInfo = {0};  /* pacify compiler */
    int64       processed = 0;
    int64       excluded = 0;
+   int64       skipped = 0;
    bool        has_before_insert_row_trig;
    bool        has_instead_insert_row_trig;
    bool        leafpart_use_multi_insert = false;
@@ -1012,6 +1013,10 @@ CopyFrom(CopyFromState cstate)
                 */
                cstate->escontext->error_occurred = false;
 
+           /* Report that this tuple was skipped by the ON_ERROR clause */
+           pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+                                        ++skipped);
+
            continue;
        }
 
index 43d83672a6fa86b2b06b1efee1f634559b9ab0d3..23944db9e6b1f055235a88a9c815e5a957f74ff3 100644 (file)
@@ -57,6 +57,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202401241
+#define CATALOG_VERSION_NO 202401251
 
 #endif
index a458c8c50a67cb263ae764fec7fb1f6dc6759362..73afa77a9c7af5cadeabee6bc7157df9d357e67f 100644 (file)
 #define PROGRESS_COPY_TUPLES_EXCLUDED 3
 #define PROGRESS_COPY_COMMAND 4
 #define PROGRESS_COPY_TYPE 5
+#define PROGRESS_COPY_TUPLES_SKIPPED 6
 
 /* Commands of COPY (as advertised via PROGRESS_COPY_COMMAND) */
 #define PROGRESS_COPY_COMMAND_FROM 1
index 55f2e95352fca6c656194ed04a4b098dd7e9327e..5e846b01e68f5a872435fb9f2278a249b8fe4867 100644 (file)
@@ -1988,7 +1988,8 @@ pg_stat_progress_copy| SELECT s.pid,
     s.param1 AS bytes_processed,
     s.param2 AS bytes_total,
     s.param3 AS tuples_processed,
-    s.param4 AS tuples_excluded
+    s.param4 AS tuples_excluded,
+    s.param7 AS tuples_skipped
    FROM (pg_stat_get_progress_info('COPY'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_create_index| SELECT s.pid,