diff options
author | Michael Paquier | 2024-12-16 02:23:38 +0000 |
---|---|---|
committer | Michael Paquier | 2024-12-16 02:23:38 +0000 |
commit | 3ad8b840ce8b1d7279f2d0d5fb7d346c0a6a3e8d (patch) | |
tree | 440fe4131047cfff9b570789e94e5688a3041a12 | |
parent | bf9165bb0c5cea71e1a9cfa0c584c4d176f6c36f (diff) |
Add some tests for encoding conversion in COPY TO/FROM
This adds a couple of tests to trigger encoding conversion when input
and server encodings do not match in COPY FROM/TO, or need_transcoding
set to true in the COPY state data. These tests rely on UTF8 <-> LATIN1
for the valid cases as LATIN1 accepts any bytes, and UTF8 <-> EUC_JP for
some of the invalid cases where a character cannot be understood,
causing a conversion failure.
Both ENCODING and client_encoding are covered. Test suggested by Andres
Freund.
Author: Sutou Kouhei
Discussion: https://postgr.es/m/[email protected]
-rw-r--r-- | src/test/regress/expected/copyencoding.out | 46 | ||||
-rw-r--r-- | src/test/regress/expected/copyencoding_1.out | 8 | ||||
-rw-r--r-- | src/test/regress/parallel_schedule | 2 | ||||
-rw-r--r-- | src/test/regress/sql/copyencoding.sql | 53 |
4 files changed, 108 insertions, 1 deletions
diff --git a/src/test/regress/expected/copyencoding.out b/src/test/regress/expected/copyencoding.out new file mode 100644 index 00000000000..cfa2ed6df00 --- /dev/null +++ b/src/test/regress/expected/copyencoding.out @@ -0,0 +1,46 @@ +-- +-- Test cases for encoding with COPY commands +-- +-- skip test if not UTF8 server encoding +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif +-- directory paths are passed to us in environment variables +\getenv abs_builddir PG_ABS_BUILDDIR +\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv' +CREATE TABLE copy_encoding_tab (t text); +-- Valid cases +-- Use ENCODING option +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); +-- Read UTF8 data as LATIN1: no error +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1'); +-- Use client_encoding +SET client_encoding TO UTF8; +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); +-- Read UTF8 data as LATIN1: no error +SET client_encoding TO LATIN1; +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); +RESET client_encoding; +-- Invalid cases +-- Use ENCODING explicitly +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); +-- Read UTF8 data as EUC_JP: no error +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP'); +ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81 +CONTEXT: COPY copy_encoding_tab, line 1 +-- Use client_encoding +SET client_encoding TO UTF8; +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); +-- Read UTF8 data as EUC_JP: no error +SET client_encoding TO EUC_JP; +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); +ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81 +CONTEXT: COPY copy_encoding_tab, line 1 +RESET client_encoding; +DROP TABLE copy_encoding_tab; diff --git a/src/test/regress/expected/copyencoding_1.out b/src/test/regress/expected/copyencoding_1.out new file mode 100644 index 00000000000..a85ee2dbd18 --- /dev/null +++ b/src/test/regress/expected/copyencoding_1.out @@ -0,0 +1,8 @@ +-- +-- Test cases for encoding with COPY commands +-- +-- skip test if not UTF8 server encoding +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 81e4222d26a..1edd9e45ebb 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -36,7 +36,7 @@ test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comment # execute two copy tests in parallel, to check that copy itself # is concurrent safe. # ---------- -test: copy copyselect copydml insert insert_conflict +test: copy copyselect copydml copyencoding insert insert_conflict # ---------- # More groups of parallel tests diff --git a/src/test/regress/sql/copyencoding.sql b/src/test/regress/sql/copyencoding.sql new file mode 100644 index 00000000000..4e96a4d6505 --- /dev/null +++ b/src/test/regress/sql/copyencoding.sql @@ -0,0 +1,53 @@ +-- +-- Test cases for encoding with COPY commands +-- + +-- skip test if not UTF8 server encoding +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif + +-- directory paths are passed to us in environment variables +\getenv abs_builddir PG_ABS_BUILDDIR + +\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv' + +CREATE TABLE copy_encoding_tab (t text); + +-- Valid cases + +-- Use ENCODING option +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); +-- Read UTF8 data as LATIN1: no error +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1'); + +-- Use client_encoding +SET client_encoding TO UTF8; +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); +-- Read UTF8 data as LATIN1: no error +SET client_encoding TO LATIN1; +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); +RESET client_encoding; + +-- Invalid cases + +-- Use ENCODING explicitly +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8'); +-- Read UTF8 data as EUC_JP: no error +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP'); + +-- Use client_encoding +SET client_encoding TO UTF8; +-- U+3042 HIRAGANA LETTER A +COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv); +-- Read UTF8 data as EUC_JP: no error +SET client_encoding TO EUC_JP; +COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv); +RESET client_encoding; + +DROP TABLE copy_encoding_tab; |