Skip to content

Commit 0255f99

Browse files
kouCommitfest Bot
authored and
Commitfest Bot
committed
Add support for adding custom COPY format
This uses the handler approach like tablesample. The approach creates an internal function that returns an internal struct. In this case, a handler returns a CopyToRoutine for COPY TO and a CopyFromRoutine for COPY FROM. Whether COPY TO or COPY FROM is passed as the "is_from" argument: copy_handler(true) returns CopyToRoutine copy_handler(false) returns CopyFromRoutine This also add a test module for custom COPY handler.
1 parent 63edca6 commit 0255f99

26 files changed

+549
-57
lines changed

src/backend/commands/copy.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@
3232
#include "parser/parse_coerce.h"
3333
#include "parser/parse_collate.h"
3434
#include "parser/parse_expr.h"
35+
#include "parser/parse_func.h"
3536
#include "parser/parse_relation.h"
3637
#include "utils/acl.h"
3738
#include "utils/builtins.h"
3839
#include "utils/lsyscache.h"
40+
#include "utils/regproc.h"
3941
#include "utils/rel.h"
4042
#include "utils/rls.h"
4143

@@ -531,10 +533,31 @@ ProcessCopyOptions(ParseState *pstate,
531533
else if (strcmp(fmt, "binary") == 0)
532534
opts_out->binary = true;
533535
else
534-
ereport(ERROR,
535-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
536-
errmsg("COPY format \"%s\" not recognized", fmt),
537-
parser_errposition(pstate, defel->location)));
536+
{
537+
List *qualified_format;
538+
Oid arg_types[1];
539+
Oid handler = InvalidOid;
540+
541+
qualified_format = stringToQualifiedNameList(fmt, NULL);
542+
arg_types[0] = INTERNALOID;
543+
handler = LookupFuncName(qualified_format, 1,
544+
arg_types, true);
545+
if (!OidIsValid(handler))
546+
ereport(ERROR,
547+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
548+
errmsg("COPY format \"%s\" not recognized", fmt),
549+
parser_errposition(pstate, defel->location)));
550+
551+
/* check that handler has correct return type */
552+
if (get_func_rettype(handler) != COPY_HANDLEROID)
553+
ereport(ERROR,
554+
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
555+
errmsg("function %s must return type %s",
556+
fmt, "copy_handler"),
557+
parser_errposition(pstate, defel->location)));
558+
559+
opts_out->handler = handler;
560+
}
538561
}
539562
else if (strcmp(defel->defname, "freeze") == 0)
540563
{

src/backend/commands/copyfrom.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ static void CopyFromBinaryEnd(CopyFromState cstate);
129129

130130
/* text format */
131131
static const CopyFromRoutine CopyFromRoutineText = {
132+
.type = T_CopyFromRoutine,
132133
.CopyFromInFunc = CopyFromTextLikeInFunc,
133134
.CopyFromStart = CopyFromTextLikeStart,
134135
.CopyFromOneRow = CopyFromTextOneRow,
@@ -137,6 +138,7 @@ static const CopyFromRoutine CopyFromRoutineText = {
137138

138139
/* CSV format */
139140
static const CopyFromRoutine CopyFromRoutineCSV = {
141+
.type = T_CopyFromRoutine,
140142
.CopyFromInFunc = CopyFromTextLikeInFunc,
141143
.CopyFromStart = CopyFromTextLikeStart,
142144
.CopyFromOneRow = CopyFromCSVOneRow,
@@ -145,6 +147,7 @@ static const CopyFromRoutine CopyFromRoutineCSV = {
145147

146148
/* binary format */
147149
static const CopyFromRoutine CopyFromRoutineBinary = {
150+
.type = T_CopyFromRoutine,
148151
.CopyFromInFunc = CopyFromBinaryInFunc,
149152
.CopyFromStart = CopyFromBinaryStart,
150153
.CopyFromOneRow = CopyFromBinaryOneRow,
@@ -155,7 +158,22 @@ static const CopyFromRoutine CopyFromRoutineBinary = {
155158
static const CopyFromRoutine *
156159
CopyFromGetRoutine(const CopyFormatOptions *opts)
157160
{
158-
if (opts->csv_mode)
161+
if (OidIsValid(opts->handler))
162+
{
163+
Datum datum;
164+
Node *routine;
165+
166+
datum = OidFunctionCall1(opts->handler, BoolGetDatum(true));
167+
routine = (Node *) DatumGetPointer(datum);
168+
if (routine == NULL || !IsA(routine, CopyFromRoutine))
169+
ereport(ERROR,
170+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
171+
errmsg("COPY handler function %s.%s did not return CopyFromRoutine struct",
172+
get_namespace_name(get_func_namespace(opts->handler)),
173+
get_func_name(opts->handler))));
174+
return castNode(CopyFromRoutine, routine);
175+
}
176+
else if (opts->csv_mode)
159177
return &CopyFromRoutineCSV;
160178
else if (opts->binary)
161179
return &CopyFromRoutineBinary;

src/backend/commands/copyto.c

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -37,56 +37,6 @@
3737
#include "utils/rel.h"
3838
#include "utils/snapmgr.h"
3939

40-
/*
41-
* This struct contains all the state variables used throughout a COPY TO
42-
* operation.
43-
*
44-
* Multi-byte encodings: all supported client-side encodings encode multi-byte
45-
* characters by having the first byte's high bit set. Subsequent bytes of the
46-
* character can have the high bit not set. When scanning data in such an
47-
* encoding to look for a match to a single-byte (ie ASCII) character, we must
48-
* use the full pg_encoding_mblen() machinery to skip over multibyte
49-
* characters, else we might find a false match to a trailing byte. In
50-
* supported server encodings, there is no possibility of a false match, and
51-
* it's faster to make useless comparisons to trailing bytes than it is to
52-
* invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
53-
* when we have to do it the hard way.
54-
*/
55-
typedef struct CopyToStateData
56-
{
57-
/* format-specific routines */
58-
const CopyToRoutine *routine;
59-
60-
/* low-level state data */
61-
CopyDest copy_dest; /* type of copy source/destination */
62-
FILE *copy_file; /* used if copy_dest == COPY_FILE */
63-
StringInfo fe_msgbuf; /* used for all dests during COPY TO */
64-
65-
int file_encoding; /* file or remote side's character encoding */
66-
bool need_transcoding; /* file encoding diff from server? */
67-
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
68-
69-
/* parameters from the COPY command */
70-
Relation rel; /* relation to copy to */
71-
QueryDesc *queryDesc; /* executable query to copy from */
72-
List *attnumlist; /* integer list of attnums to copy */
73-
char *filename; /* filename, or NULL for STDOUT */
74-
bool is_program; /* is 'filename' a program to popen? */
75-
copy_data_dest_cb data_dest_cb; /* function for writing data */
76-
77-
CopyFormatOptions opts;
78-
Node *whereClause; /* WHERE condition (or NULL) */
79-
80-
/*
81-
* Working state
82-
*/
83-
MemoryContext copycontext; /* per-copy execution context */
84-
85-
FmgrInfo *out_functions; /* lookup info for output functions */
86-
MemoryContext rowcontext; /* per-row evaluation context */
87-
uint64 bytes_processed; /* number of bytes processed so far */
88-
} CopyToStateData;
89-
9040
/* DestReceiver for COPY (query) TO */
9141
typedef struct
9242
{
@@ -140,6 +90,7 @@ static void CopySendInt16(CopyToState cstate, int16 val);
14090

14191
/* text format */
14292
static const CopyToRoutine CopyToRoutineText = {
93+
.type = T_CopyToRoutine,
14394
.CopyToStart = CopyToTextLikeStart,
14495
.CopyToOutFunc = CopyToTextLikeOutFunc,
14596
.CopyToOneRow = CopyToTextOneRow,
@@ -148,6 +99,7 @@ static const CopyToRoutine CopyToRoutineText = {
14899

149100
/* CSV format */
150101
static const CopyToRoutine CopyToRoutineCSV = {
102+
.type = T_CopyToRoutine,
151103
.CopyToStart = CopyToTextLikeStart,
152104
.CopyToOutFunc = CopyToTextLikeOutFunc,
153105
.CopyToOneRow = CopyToCSVOneRow,
@@ -156,6 +108,7 @@ static const CopyToRoutine CopyToRoutineCSV = {
156108

157109
/* binary format */
158110
static const CopyToRoutine CopyToRoutineBinary = {
111+
.type = T_CopyToRoutine,
159112
.CopyToStart = CopyToBinaryStart,
160113
.CopyToOutFunc = CopyToBinaryOutFunc,
161114
.CopyToOneRow = CopyToBinaryOneRow,
@@ -166,7 +119,22 @@ static const CopyToRoutine CopyToRoutineBinary = {
166119
static const CopyToRoutine *
167120
CopyToGetRoutine(const CopyFormatOptions *opts)
168121
{
169-
if (opts->csv_mode)
122+
if (OidIsValid(opts->handler))
123+
{
124+
Datum datum;
125+
Node *routine;
126+
127+
datum = OidFunctionCall1(opts->handler, BoolGetDatum(false));
128+
routine = (Node *) DatumGetPointer(datum);
129+
if (routine == NULL || !IsA(routine, CopyToRoutine))
130+
ereport(ERROR,
131+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
132+
errmsg("COPY handler function %s.%s did not return CopyToRoutine struct",
133+
get_namespace_name(get_func_namespace(opts->handler)),
134+
get_func_name(opts->handler))));
135+
return castNode(CopyToRoutine, routine);
136+
}
137+
else if (opts->csv_mode)
170138
return &CopyToRoutineCSV;
171139
else if (opts->binary)
172140
return &CopyToRoutineBinary;

src/backend/nodes/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ node_headers = \
5050
access/sdir.h \
5151
access/tableam.h \
5252
access/tsmapi.h \
53+
commands/copyapi.h \
5354
commands/event_trigger.h \
5455
commands/trigger.h \
5556
executor/tuptable.h \

src/backend/nodes/gen_node_support.pl

100644100755
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ sub elem
6262
access/sdir.h
6363
access/tableam.h
6464
access/tsmapi.h
65+
commands/copyapi.h
6566
commands/event_trigger.h
6667
commands/trigger.h
6768
executor/tuptable.h
@@ -86,6 +87,7 @@ sub elem
8687
access/sdir.h
8788
access/tableam.h
8889
access/tsmapi.h
90+
commands/copyapi.h
8991
commands/event_trigger.h
9092
commands/trigger.h
9193
executor/tuptable.h

src/backend/utils/adt/pseudotypes.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(fdw_handler);
370370
PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler);
371371
PSEUDOTYPE_DUMMY_IO_FUNCS(index_am_handler);
372372
PSEUDOTYPE_DUMMY_IO_FUNCS(tsm_handler);
373+
PSEUDOTYPE_DUMMY_IO_FUNCS(copy_handler);
373374
PSEUDOTYPE_DUMMY_IO_FUNCS(internal);
374375
PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement);
375376
PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray);

src/include/catalog/pg_proc.dat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7852,6 +7852,12 @@
78527852
{ oid => '3312', descr => 'I/O',
78537853
proname => 'tsm_handler_out', prorettype => 'cstring',
78547854
proargtypes => 'tsm_handler', prosrc => 'tsm_handler_out' },
7855+
{ oid => '8753', descr => 'I/O',
7856+
proname => 'copy_handler_in', proisstrict => 'f', prorettype => 'copy_handler',
7857+
proargtypes => 'cstring', prosrc => 'copy_handler_in' },
7858+
{ oid => '8754', descr => 'I/O',
7859+
proname => 'copy_handler_out', prorettype => 'cstring',
7860+
proargtypes => 'copy_handler', prosrc => 'copy_handler_out' },
78557861
{ oid => '267', descr => 'I/O',
78567862
proname => 'table_am_handler_in', proisstrict => 'f',
78577863
prorettype => 'table_am_handler', proargtypes => 'cstring',

src/include/catalog/pg_type.dat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,12 @@
633633
typcategory => 'P', typinput => 'tsm_handler_in',
634634
typoutput => 'tsm_handler_out', typreceive => '-', typsend => '-',
635635
typalign => 'i' },
636+
{ oid => '8752',
637+
descr => 'pseudo-type for the result of a COPY TO/FROM handler function',
638+
typname => 'copy_handler', typlen => '4', typbyval => 't', typtype => 'p',
639+
typcategory => 'P', typinput => 'copy_handler_in',
640+
typoutput => 'copy_handler_out', typreceive => '-', typsend => '-',
641+
typalign => 'i' },
636642
{ oid => '269',
637643
descr => 'pseudo-type for the result of a table AM handler function',
638644
typname => 'table_am_handler', typlen => '4', typbyval => 't', typtype => 'p',

src/include/commands/copy.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,10 @@ typedef struct CopyFormatOptions
8787
CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */
8888
int64 reject_limit; /* maximum tolerable number of errors */
8989
List *convert_select; /* list of column names (can be NIL) */
90+
Oid handler; /* handler function for custom format routine */
9091
} CopyFormatOptions;
9192

92-
/* These are private in commands/copy[from|to].c */
93+
/* These are private in commands/copy[from|to]_internal.h */
9394
typedef struct CopyFromStateData *CopyFromState;
9495
typedef struct CopyToStateData *CopyToState;
9596

src/include/commands/copyapi.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
*/
2323
typedef struct CopyToRoutine
2424
{
25+
NodeTag type;
26+
2527
/*
2628
* Set output function information. This callback is called once at the
2729
* beginning of COPY TO.
@@ -60,6 +62,8 @@ typedef struct CopyToRoutine
6062
*/
6163
typedef struct CopyFromRoutine
6264
{
65+
NodeTag type;
66+
6367
/*
6468
* Set input function information. This callback is called once at the
6569
* beginning of COPY FROM.

src/include/commands/copyto_internal.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
#ifndef COPYTO_INTERNAL_H
1515
#define COPYTO_INTERNAL_H
1616

17+
#include "commands/copy.h"
18+
#include "executor/execdesc.h"
19+
#include "executor/tuptable.h"
20+
#include "nodes/execnodes.h"
21+
1722
/*
1823
* Represents the different dest cases we need to worry about at
1924
* the bottom level
@@ -25,4 +30,54 @@ typedef enum CopyDest
2530
COPY_DEST_CALLBACK, /* to callback function */
2631
} CopyDest;
2732

33+
/*
34+
* This struct contains all the state variables used throughout a COPY TO
35+
* operation.
36+
*
37+
* Multi-byte encodings: all supported client-side encodings encode multi-byte
38+
* characters by having the first byte's high bit set. Subsequent bytes of the
39+
* character can have the high bit not set. When scanning data in such an
40+
* encoding to look for a match to a single-byte (ie ASCII) character, we must
41+
* use the full pg_encoding_mblen() machinery to skip over multibyte
42+
* characters, else we might find a false match to a trailing byte. In
43+
* supported server encodings, there is no possibility of a false match, and
44+
* it's faster to make useless comparisons to trailing bytes than it is to
45+
* invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
46+
* when we have to do it the hard way.
47+
*/
48+
typedef struct CopyToStateData
49+
{
50+
/* format-specific routines */
51+
const CopyToRoutine *routine;
52+
53+
/* low-level state data */
54+
CopyDest copy_dest; /* type of copy source/destination */
55+
FILE *copy_file; /* used if copy_dest == COPY_FILE */
56+
StringInfo fe_msgbuf; /* used for all dests during COPY TO */
57+
58+
int file_encoding; /* file or remote side's character encoding */
59+
bool need_transcoding; /* file encoding diff from server? */
60+
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
61+
62+
/* parameters from the COPY command */
63+
Relation rel; /* relation to copy to */
64+
QueryDesc *queryDesc; /* executable query to copy from */
65+
List *attnumlist; /* integer list of attnums to copy */
66+
char *filename; /* filename, or NULL for STDOUT */
67+
bool is_program; /* is 'filename' a program to popen? */
68+
copy_data_dest_cb data_dest_cb; /* function for writing data */
69+
70+
CopyFormatOptions opts;
71+
Node *whereClause; /* WHERE condition (or NULL) */
72+
73+
/*
74+
* Working state
75+
*/
76+
MemoryContext copycontext; /* per-copy execution context */
77+
78+
FmgrInfo *out_functions; /* lookup info for output functions */
79+
MemoryContext rowcontext; /* per-row evaluation context */
80+
uint64 bytes_processed; /* number of bytes processed so far */
81+
} CopyToStateData;
82+
2883
#endif /* COPYTO_INTERNAL_H */

src/include/nodes/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ node_support_input_i = [
1212
'access/sdir.h',
1313
'access/tableam.h',
1414
'access/tsmapi.h',
15+
'commands/copyapi.h',
1516
'commands/event_trigger.h',
1617
'commands/trigger.h',
1718
'executor/tuptable.h',

src/test/modules/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ SUBDIRS = \
1616
spgist_name_ops \
1717
test_bloomfilter \
1818
test_copy_callbacks \
19+
test_copy_format \
1920
test_custom_rmgrs \
2021
test_ddl_deparse \
2122
test_dsa \

src/test/modules/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ subdir('spgist_name_ops')
1515
subdir('ssl_passphrase_callback')
1616
subdir('test_bloomfilter')
1717
subdir('test_copy_callbacks')
18+
subdir('test_copy_format')
1819
subdir('test_custom_rmgrs')
1920
subdir('test_ddl_deparse')
2021
subdir('test_dsa')
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Generated subdirectories
2+
/log/
3+
/results/
4+
/tmp_check/

0 commit comments

Comments
 (0)