From 92ce38795f010c9a254a9e180ae2e45763388fd5 Mon Sep 17 00:00:00 2001 From: Jim Jones Date: Tue, 10 Jun 2025 10:46:24 +0200 Subject: [PATCH] Add xmlcanonicalize function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the xmlcanonicalize function, which transforms an XML document into its canonical form according to the W3C C14N 1.1 specification. xmlcanonicalize(doc xml, keep_comments boolean DEFAULT true) -> xml * doc: The XML document to be canonicalized. * keep_comments: A flag indicating whether to preserve or discard XML comments from the input document. If omitted, it defaults to 'true'. This function is implemented using the xmlC14NDocDumpMemory() function from libxml2’s Canonical XML (C14N) module. --- doc/src/sgml/func.sgml | 50 +++++++++++++++ src/backend/catalog/system_functions.sql | 6 ++ src/backend/utils/adt/xml.c | 60 +++++++++++++++++ src/include/catalog/pg_proc.dat | 3 + src/test/regress/expected/xml.out | 82 ++++++++++++++++++++++++ src/test/regress/expected/xml_1.out | 80 +++++++++++++++++++++++ src/test/regress/expected/xml_2.out | 82 ++++++++++++++++++++++++ src/test/regress/sql/xml.sql | 52 +++++++++++++++ 8 files changed, 415 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 224d4fe5a9f9..8b5c55dc6bc2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14633,6 +14633,56 @@ SELECT xmltext('< foo & bar >'); + + <literal>xmlcanonicalize</literal> + + + xmlcanonicalize + + + +xmlcanonicalize ( doc xml [, keep_comments boolean DEFAULT true] ) xml + + + + + This function transforms a given XML document into its canonical form, + as defined by the W3C Canonical XML 1.1 Specification, which standardizes the document's + structure and syntax to facilitate comparison and validation. + The keep_comments parameter controls whether XML comments from the input document are preserved or discarded. + If omitted, it defaults to true. + + + + Example: + + + 42 + + '::xml); + xmlcanonicalize +----------------------------------------------------------------------------- + 42 +(1 row) + +SELECT + xmlcanonicalize( + ' + + 42 + + '::xml, false); + xmlcanonicalize +----------------------------------------------------------- + 42 +(1 row) +]]> + + + <literal>xmlcomment</literal> diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 566f308e4439..15c33335dc7b 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml) IMMUTABLE PARALLEL SAFE STRICT COST 1 RETURN xpath_exists($1, $2, '{}'::text[]); +CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true) + RETURNS xml + LANGUAGE internal + IMMUTABLE PARALLEL SAFE STRICT +AS 'xmlcanonicalize'; + CREATE OR REPLACE FUNCTION pg_sleep_for(interval) RETURNS void LANGUAGE sql diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2eae..094eb4c33eff 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -58,6 +58,7 @@ #include #include #include +#include /* * We used to check for xmlStructuredErrorContext via a configure test; but @@ -544,6 +545,65 @@ xmltext(PG_FUNCTION_ARGS) #endif /* not USE_LIBXML */ } +/* + * Canonicalizes the given XML document according to the W3C Canonical XML 1.1 + * specification, using libxml2's xmlC14NDocDumpMemory(). + * + * The input XML must be a well-formed document (not a fragment). The + * canonical form is deterministic and useful for digital signatures and + * comparing logically equivalent XML. + * + * The second argument determines whether comments are preserved + * (true) or omitted (false) in the canonicalized output. + */ +Datum xmlcanonicalize(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + xmltype *arg = PG_GETARG_XML_P(0); + bool keep_comments = PG_GETARG_BOOL(1); + text *result; + int nbytes; + xmlDocPtr doc; + xmlChar *xmlbuf = NULL; + + /* Parse the input xmltype into a full XML document */ + doc = xml_parse(arg, XMLOPTION_DOCUMENT, false, + GetDatabaseEncoding(), NULL, NULL, NULL); + + /* + * Canonicalize the XML document into memory using Canonical XML 1.1. + * + * xmlC14NDocDumpMemory arguments: + * - doc: the XML document to canonicalize (already parsed above) + * - nodes: NULL means the entire document is canonicalized + * - mode: 2 selects the Canonical XML 1.1 algorithm (xmlC14NMode enum) + * - inclusive_ns_prefixes: NULL includes all namespaces by default + * - with_comments: determined by keep_comments argument + * - doc_txt_ptr: output buffer receiving the canonicalized XML (xmlbuf) + * + * On success, xmlbuf points to the serialized canonical form, + * and nbytes holds its size. + */ + nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf); + + if (doc) + xmlFreeDoc(doc); + + if (nbytes < 0 || xmlbuf == NULL) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not canonicalize XML document"))); + + result = cstring_to_text_with_len((const char *)xmlbuf, nbytes); + + xmlFree(xmlbuf); + + PG_RETURN_XML_P(result); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} /* * TODO: xmlconcat needs to merge the notations and unparsed entities diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d3d28a263fa9..d6c5b80a8f39 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -9133,6 +9133,9 @@ { oid => '3813', descr => 'generate XML text node', proname => 'xmltext', prorettype => 'xml', proargtypes => 'text', prosrc => 'xmltext' }, +{ oid => '3814', descr => 'generate the canonical form of an XML document', + proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool', + prosrc => 'xmlcanonicalize' }, { oid => '2923', descr => 'map table contents to XML', proname => 'table_to_xml', procost => '100', provolatile => 's', diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 103a22a3b1d3..688c0fc3e9de 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 73c411118a39..8bc3ac1c9668 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -1496,3 +1496,83 @@ ERROR: unsupported XML feature LINE 1: SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j':... ^ DETAIL: This functionality requires the server to be built with libxml support. +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +ERROR: unsupported XML feature +LINE 2: (' + ^ +DETAIL: This functionality requires the server to be built with libxml support. +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- +(0 rows) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' ', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(''); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' '); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo'); +ERROR: unsupported XML feature at character 24 +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index a85d95358d90..4ce36ff82571 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -1867,3 +1867,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 0ea4f508837c..4af51a9908f8 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -679,3 +679,55 @@ SELECT xmltext(' '); SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}'); SELECT xmltext('foo & <"bar">'); SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); + +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); + +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(NULL, true); + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +SELECT xmlcanonicalize(' ', true); +SELECT xmlcanonicalize('foo', true); +SELECT xmlcanonicalize(''); +SELECT xmlcanonicalize(' '); +SELECT xmlcanonicalize('foo'); +\set VERBOSITY default \ No newline at end of file