diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 224d4fe5a9f9..8b5c55dc6bc2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14633,6 +14633,56 @@ SELECT xmltext('< foo & bar >'); + + <literal>xmlcanonicalize</literal> + + + xmlcanonicalize + + + +xmlcanonicalize ( doc xml [, keep_comments boolean DEFAULT true] ) xml + + + + + This function transforms a given XML document into its canonical form, + as defined by the W3C Canonical XML 1.1 Specification, which standardizes the document's + structure and syntax to facilitate comparison and validation. + The keep_comments parameter controls whether XML comments from the input document are preserved or discarded. + If omitted, it defaults to true. + + + + Example: + + + 42 + + '::xml); + xmlcanonicalize +----------------------------------------------------------------------------- + 42 +(1 row) + +SELECT + xmlcanonicalize( + ' + + 42 + + '::xml, false); + xmlcanonicalize +----------------------------------------------------------- + 42 +(1 row) +]]> + + + <literal>xmlcomment</literal> diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 566f308e4439..15c33335dc7b 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml) IMMUTABLE PARALLEL SAFE STRICT COST 1 RETURN xpath_exists($1, $2, '{}'::text[]); +CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true) + RETURNS xml + LANGUAGE internal + IMMUTABLE PARALLEL SAFE STRICT +AS 'xmlcanonicalize'; + CREATE OR REPLACE FUNCTION pg_sleep_for(interval) RETURNS void LANGUAGE sql diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2eae..094eb4c33eff 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -58,6 +58,7 @@ #include #include #include +#include /* * We used to check for xmlStructuredErrorContext via a configure test; but @@ -544,6 +545,65 @@ xmltext(PG_FUNCTION_ARGS) #endif /* not USE_LIBXML */ } +/* + * Canonicalizes the given XML document according to the W3C Canonical XML 1.1 + * specification, using libxml2's xmlC14NDocDumpMemory(). + * + * The input XML must be a well-formed document (not a fragment). The + * canonical form is deterministic and useful for digital signatures and + * comparing logically equivalent XML. + * + * The second argument determines whether comments are preserved + * (true) or omitted (false) in the canonicalized output. + */ +Datum xmlcanonicalize(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + xmltype *arg = PG_GETARG_XML_P(0); + bool keep_comments = PG_GETARG_BOOL(1); + text *result; + int nbytes; + xmlDocPtr doc; + xmlChar *xmlbuf = NULL; + + /* Parse the input xmltype into a full XML document */ + doc = xml_parse(arg, XMLOPTION_DOCUMENT, false, + GetDatabaseEncoding(), NULL, NULL, NULL); + + /* + * Canonicalize the XML document into memory using Canonical XML 1.1. + * + * xmlC14NDocDumpMemory arguments: + * - doc: the XML document to canonicalize (already parsed above) + * - nodes: NULL means the entire document is canonicalized + * - mode: 2 selects the Canonical XML 1.1 algorithm (xmlC14NMode enum) + * - inclusive_ns_prefixes: NULL includes all namespaces by default + * - with_comments: determined by keep_comments argument + * - doc_txt_ptr: output buffer receiving the canonicalized XML (xmlbuf) + * + * On success, xmlbuf points to the serialized canonical form, + * and nbytes holds its size. + */ + nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, keep_comments, &xmlbuf); + + if (doc) + xmlFreeDoc(doc); + + if (nbytes < 0 || xmlbuf == NULL) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not canonicalize XML document"))); + + result = cstring_to_text_with_len((const char *)xmlbuf, nbytes); + + xmlFree(xmlbuf); + + PG_RETURN_XML_P(result); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} /* * TODO: xmlconcat needs to merge the notations and unparsed entities diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d3d28a263fa9..d6c5b80a8f39 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -9133,6 +9133,9 @@ { oid => '3813', descr => 'generate XML text node', proname => 'xmltext', prorettype => 'xml', proargtypes => 'text', prosrc => 'xmltext' }, +{ oid => '3814', descr => 'generate the canonical form of an XML document', + proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool', + prosrc => 'xmlcanonicalize' }, { oid => '2923', descr => 'map table contents to XML', proname => 'table_to_xml', procost => '100', provolatile => 's', diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 103a22a3b1d3..688c0fc3e9de 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 73c411118a39..8bc3ac1c9668 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -1496,3 +1496,83 @@ ERROR: unsupported XML feature LINE 1: SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j':... ^ DETAIL: This functionality requires the server to be built with libxml support. +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +ERROR: unsupported XML feature +LINE 2: (' + ^ +DETAIL: This functionality requires the server to be built with libxml support. +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- +(0 rows) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' ', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(''); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' '); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo'); +ERROR: unsupported XML feature at character 24 +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index a85d95358d90..4ce36ff82571 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -1867,3 +1867,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 0ea4f508837c..4af51a9908f8 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -679,3 +679,55 @@ SELECT xmltext(' '); SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}'); SELECT xmltext('foo & <"bar">'); SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); + +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); + +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(NULL, true); + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +SELECT xmlcanonicalize(' ', true); +SELECT xmlcanonicalize('foo', true); +SELECT xmlcanonicalize(''); +SELECT xmlcanonicalize(' '); +SELECT xmlcanonicalize('foo'); +\set VERBOSITY default \ No newline at end of file