Skip to content

#53655: Improve speed of DOMNode::C14N() on large XML documents #12278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,6 @@ PHP 8.4 UPGRADE NOTES
14. Performance Improvements
========================================

* The performance of DOMNode::C14N() is greatly improved for the case without
an xpath query. This can give a time improvement of easily two order of
magnitude for documents with tens of thousands of nodes.
44 changes: 26 additions & 18 deletions ext/dom/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -1736,6 +1736,25 @@ PHP_METHOD(DOMNode, lookupNamespaceURI)
}
/* }}} end dom_node_lookup_namespace_uri */

static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent)
{
xmlNodePtr root = user_data;
/* We have to unroll the first iteration because node->parent
* is not necessarily equal to parent due to libxml2 tree rules (ns decls out of the tree for example). */
if (node == root) {
return 1;
}
node = parent;
while (node != NULL) {
if (node == root) {
return 1;
}
node = node->parent;
}

return 0;
}

static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
{
zval *id;
Expand Down Expand Up @@ -1777,24 +1796,10 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

bool simple_node_parent_lookup_callback = false;
if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
ctxp = xmlXPathNewContext(docp);
ctxp->node = nodep;
xpathobjp = xmlXPathEvalExpression((xmlChar *) "(.//. | .//@* | .//namespace::*)", ctxp);
ctxp->node = NULL;
if (xpathobjp && xpathobjp->type == XPATH_NODESET) {
nodeset = xpathobjp->nodesetval;
} else {
if (xpathobjp) {
xmlXPathFreeObject(xpathobjp);
}
xmlXPathFreeContext(ctxp);
zend_throw_error(NULL, "XPath query did not return a nodeset");
RETURN_THROWS();
}
simple_node_parent_lookup_callback = true;
}
} else {
/*xpath query from xpath_array */
Expand Down Expand Up @@ -1873,8 +1878,11 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
}

if (buf != NULL) {
ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes,
with_comments, buf);
if (simple_node_parent_lookup_callback) {
ret = xmlC14NExecute(docp, dom_canonicalize_node_parent_lookup_cb, nodep, exclusive, inclusive_ns_prefixes, with_comments, buf);
} else {
ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes, with_comments, buf);
}
}

if (inclusive_ns_prefixes != NULL) {
Expand Down
37 changes: 37 additions & 0 deletions ext/dom/tests/canonicalization_special_nodes.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
--TEST--
Test: Canonicalization with special nodes
--EXTENSIONS--
dom
--FILE--
<?php
$xml = <<<EOXML
<?xml version="1.0"?>
<!DOCTYPE doc [
]>
<doc xmlns="">
<![CDATA[bar]]>
<!-- x -->
<temp xmlns=""/>
<?pi-no-data ?>
</doc>
EOXML;

$dom = new DOMDocument();
$dom->loadXML($xml);
$doc = $dom->documentElement;
echo $doc->C14N(withComments: true);
echo $doc->C14N(withComments: false);

?>
--EXPECT--
<doc>
bar
<!-- x -->
<temp></temp>
<?pi-no-data?>
</doc><doc>
bar

<temp></temp>
<?pi-no-data?>
</doc>