Skip to content

Fix bug #79701: getElementById does not correctly work with duplicate definitions #14349

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions ext/dom/attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include "php_dom.h"
#include "dom_properties.h"
#include "internal_helpers.h"

/*
* class DOMAttr extends DOMNode
Expand Down Expand Up @@ -106,6 +107,16 @@ zend_result dom_attr_specified_read(dom_object *obj, zval *retval)

/* }}} */

void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp)
{
if (attrp->atype == XML_ATTRIBUTE_ID) {
xmlRemoveID(attrp->doc, attrp);
attrp->atype = XML_ATTRIBUTE_ID;
}

dom_mark_ids_modified(obj->document);
}

/* {{{ value string
readonly=no
URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#ID-221662474
Expand All @@ -122,6 +133,8 @@ zend_result dom_attr_value_write(dom_object *obj, zval *newval)
{
DOM_PROP_NODE(xmlAttrPtr, attrp, obj);

dom_attr_value_will_change(obj, attrp);

/* Typed property, this is already a string */
ZEND_ASSERT(Z_TYPE_P(newval) == IS_STRING);
zend_string *str = Z_STR_P(newval);
Expand Down
51 changes: 32 additions & 19 deletions ext/dom/document.c
Original file line number Diff line number Diff line change
Expand Up @@ -1028,34 +1028,47 @@ Since: DOM Level 2
*/
PHP_METHOD(DOMDocument, getElementById)
{
zval *id;
xmlDocPtr docp;
xmlAttrPtr attrp;
size_t idname_len;
dom_object *intern;
char *idname;

id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &idname, &idname_len) == FAILURE) {
RETURN_THROWS();
}

DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_STRING(idname, idname_len)
ZEND_PARSE_PARAMETERS_END();

attrp = xmlGetID(docp, BAD_CAST idname);
DOM_GET_OBJ(docp, ZEND_THIS, xmlDocPtr, intern);

/* From the moment an ID is created, libxml2's behaviour is to cache that element, even
* if that element is not yet attached to the document. Similarly, only upon destruction of
* the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply
* ingrained in the library, and uses the cache for various purposes, it seems like a bad
* idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check
* if the node is attached to the document. */
if (attrp && attrp->parent && php_dom_is_node_connected(attrp->parent)) {
DOM_RET_OBJ((xmlNodePtr) attrp->parent, intern);
/* If the document has not been manipulated yet, the ID cache will be in sync and we can trust its result.
* This check mainly exists because a lot of times people just query a document without modifying it,
* and we can allow quick access to IDs in that case. */
if (!dom_is_document_cache_modified_since_parsing(intern->document)) {
const xmlAttr *attrp = xmlGetID(docp, BAD_CAST idname);
if (attrp && attrp->parent) {
DOM_RET_OBJ(attrp->parent, intern);
}
} else {
RETVAL_NULL();
}
/* From the moment an ID is created, libxml2's behaviour is to cache that element, even
* if that element is not yet attached to the document. Similarly, only upon destruction of
* the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply
* ingrained in the library, and uses the cache for various purposes, it seems like a bad
* idea and lost cause to fight it. */

const xmlNode *base = (const xmlNode *) docp;
const xmlNode *node = base->children;
while (node != NULL) {
if (node->type == XML_ELEMENT_NODE) {
for (const xmlAttr *attr = node->properties; attr != NULL; attr = attr->next) {
if (attr->atype == XML_ATTRIBUTE_ID && dom_compare_value(attr, BAD_CAST idname)) {
DOM_RET_OBJ((xmlNodePtr) node, intern);
return;
}
}
}

node = php_dom_next_in_tree_order(node, base);
}
}
}
/* }}} end dom_document_get_element_by_id */

Expand Down
62 changes: 40 additions & 22 deletions ext/dom/element.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ zend_result dom_element_id_read(dom_object *obj, zval *retval)
return dom_element_reflected_attribute_read(obj, retval, "id");
}

static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id);
static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id, php_libxml_ref_obj *document);

zend_result dom_element_id_write(dom_object *obj, zval *newval)
{
xmlAttrPtr attr = dom_element_reflected_attribute_write(obj, newval, "id");
if (!attr) {
return FAILURE;
}
php_set_attribute_id(attr, true);
php_set_attribute_id(attr, true, obj->document);
return SUCCESS;
}
/* }}} */
Expand Down Expand Up @@ -352,6 +352,16 @@ static xmlNodePtr dom_create_attribute(xmlNodePtr nodep, const char *name, const
}
}

static void dom_check_register_attribute_id(xmlAttrPtr attr, php_libxml_ref_obj *document)
{
dom_mark_ids_modified(document);

if (attr->atype != XML_ATTRIBUTE_ID && attr->doc->type == XML_HTML_DOCUMENT_NODE && attr->ns == NULL && xmlStrEqual(attr->name, BAD_CAST "id")) {
/* To respect XML's ID behaviour, we only do this registration for HTML documents. */
attr->atype = XML_ATTRIBUTE_ID;
}
}

/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-F68F082
Modern spec URL: https://dom.spec.whatwg.org/#dom-element-setattribute
Since:
Expand All @@ -360,7 +370,6 @@ PHP_METHOD(DOMElement, setAttribute)
{
zval *id;
xmlNode *nodep;
xmlNodePtr attr = NULL;
int name_valid;
size_t name_len, value_len;
dom_object *intern;
Expand Down Expand Up @@ -394,23 +403,28 @@ PHP_METHOD(DOMElement, setAttribute)
}

/* Can't use xmlSetNsProp unconditionally here because that doesn't take into account the qualified name matching... */
attr = (xmlNodePtr) php_dom_get_attribute_node(nodep, BAD_CAST name, name_len);
xmlAttrPtr attr = php_dom_get_attribute_node(nodep, BAD_CAST name, name_len);
if (attr != NULL) {
dom_remove_all_children(attr);
dom_attr_value_will_change(intern, attr);
dom_remove_all_children((xmlNodePtr) attr);
xmlNodePtr node = xmlNewDocText(attr->doc, BAD_CAST value);
xmlAddChild(attr, node);
xmlAddChild((xmlNodePtr) attr, node);
} else {
attr = (xmlNodePtr) xmlSetNsProp(nodep, NULL, name_processed, BAD_CAST value);
attr = xmlSetNsProp(nodep, NULL, name_processed, BAD_CAST value);
if (EXPECTED(attr != NULL)) {
dom_check_register_attribute_id(attr, intern->document);
}
}

if (name_processed != BAD_CAST name) {
efree(name_processed);
}
} else {
attr = dom_get_attribute_or_nsdecl(intern, nodep, BAD_CAST name, name_len);
xmlNodePtr attr = dom_get_attribute_or_nsdecl(intern, nodep, BAD_CAST name, name_len);
if (attr != NULL) {
switch (attr->type) {
case XML_ATTRIBUTE_NODE:
dom_attr_value_will_change(intern, (xmlAttrPtr) attr);
node_list_unlink(attr->children);
break;
case XML_NAMESPACE_DECL:
Expand Down Expand Up @@ -693,7 +707,10 @@ static void dom_element_set_attribute_node_common(INTERNAL_FUNCTION_PARAMETERS,

xmlAddChild(nodep, (xmlNodePtr) attrp);
if (!modern) {
dom_mark_ids_modified(intern->document);
php_dom_reconcile_attribute_namespace_after_insertion(attrp);
} else {
dom_check_register_attribute_id(attrp, intern->document);
}

/* Returns old property if removed otherwise NULL */
Expand Down Expand Up @@ -870,6 +887,8 @@ static void dom_set_attribute_ns_legacy(dom_object *intern, xmlNodePtr elemp, ch
int errorcode = dom_check_qname(name, &localname, &prefix, uri_len, name_len);

if (errorcode == 0) {
dom_mark_ids_modified(intern->document);

if (uri_len > 0) {
nodep = (xmlNodePtr) xmlHasNsProp(elemp, BAD_CAST localname, BAD_CAST uri);
if (nodep != NULL && nodep->type != XML_ATTRIBUTE_DECL) {
Expand Down Expand Up @@ -958,8 +977,11 @@ static void dom_set_attribute_ns_modern(dom_object *intern, xmlNodePtr elemp, ze
if (errorcode == 0) {
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);
xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(ns_mapper, prefix, xmlStrlen(prefix), uri);
if (UNEXPECTED(xmlSetNsProp(elemp, ns, localname, BAD_CAST value) == NULL)) {
xmlAttrPtr attr = xmlSetNsProp(elemp, ns, localname, BAD_CAST value);
if (UNEXPECTED(attr == NULL)) {
php_dom_throw_error(INVALID_STATE_ERR, /* strict */ true);
} else {
dom_check_register_attribute_id(attr, intern->document);
}
} else {
php_dom_throw_error(errorcode, /* strict */ true);
Expand Down Expand Up @@ -1270,20 +1292,16 @@ PHP_METHOD(DOMElement, hasAttributeNS)
}
/* }}} end dom_element_has_attribute_ns */

static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id) /* {{{ */
static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id, php_libxml_ref_obj *document) /* {{{ */
{
if (is_id == 1 && attrp->atype != XML_ATTRIBUTE_ID) {
xmlChar *id_val;

id_val = xmlNodeListGetString(attrp->doc, attrp->children, 1);
if (id_val != NULL) {
xmlAddID(NULL, attrp->doc, id_val, attrp);
xmlFree(id_val);
}
} else if (is_id == 0 && attrp->atype == XML_ATTRIBUTE_ID) {
if (is_id && attrp->atype != XML_ATTRIBUTE_ID) {
attrp->atype = XML_ATTRIBUTE_ID;
} else if (!is_id && attrp->atype == XML_ATTRIBUTE_ID) {
xmlRemoveID(attrp->doc, attrp);
attrp->atype = 0;
}

dom_mark_ids_modified(document);
}
/* }}} */

Expand Down Expand Up @@ -1311,7 +1329,7 @@ PHP_METHOD(DOMElement, setIdAttribute)
if (attrp == NULL || attrp->type == XML_ATTRIBUTE_DECL) {
php_dom_throw_error(NOT_FOUND_ERR, dom_get_strict_error(intern->document));
} else {
php_set_attribute_id(attrp, is_id);
php_set_attribute_id(attrp, is_id, intern->document);
}
}
/* }}} end dom_element_set_id_attribute */
Expand Down Expand Up @@ -1340,7 +1358,7 @@ PHP_METHOD(DOMElement, setIdAttributeNS)
if (attrp == NULL || attrp->type == XML_ATTRIBUTE_DECL) {
php_dom_throw_error(NOT_FOUND_ERR, dom_get_strict_error(intern->document));
} else {
php_set_attribute_id(attrp, is_id);
php_set_attribute_id(attrp, is_id, intern->document);
}
}
/* }}} end dom_element_set_id_attribute_ns */
Expand All @@ -1367,7 +1385,7 @@ static void dom_element_set_id_attribute_node(INTERNAL_FUNCTION_PARAMETERS, zend
if (attrp->parent != nodep) {
php_dom_throw_error(NOT_FOUND_ERR, dom_get_strict_error(intern->document));
} else {
php_set_attribute_id(attrp, is_id);
php_set_attribute_id(attrp, is_id, intern->document);
}
}

Expand Down
27 changes: 27 additions & 0 deletions ext/dom/internal_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,31 @@ DOM_DEF_GET_CE_FUNC(domimplementation)

#endif

static zend_always_inline size_t dom_minimum_modification_nr_since_parsing(php_libxml_ref_obj *doc_ptr)
{
/* For old-style DOM, we need a "new DOMDocument" + a load, so the minimum modification nr is 2.
* For new-style DOM, we need only to call a named constructor, so the minimum modification nr is 1. */
return doc_ptr->class_type == PHP_LIBXML_CLASS_MODERN ? 1 : 2;
}

static zend_always_inline void dom_mark_document_cache_as_modified_since_parsing(php_libxml_ref_obj *doc_ptr)
{
if (doc_ptr) {
doc_ptr->cache_tag.modification_nr = MAX(dom_minimum_modification_nr_since_parsing(doc_ptr) + 1,
doc_ptr->cache_tag.modification_nr);
}
}

/* Marks the ID cache as potentially stale */
static zend_always_inline void dom_mark_ids_modified(php_libxml_ref_obj *doc_ptr)
{
/* Although this is currently a wrapper function, it's best to abstract the actual implementation away. */
dom_mark_document_cache_as_modified_since_parsing(doc_ptr);
}

static zend_always_inline bool dom_is_document_cache_modified_since_parsing(php_libxml_ref_obj *doc_ptr)
{
return !doc_ptr || doc_ptr->cache_tag.modification_nr > dom_minimum_modification_nr_since_parsing(doc_ptr);
}

#endif
1 change: 1 addition & 0 deletions ext/dom/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ zend_result dom_node_node_value_write(dom_object *obj, zval *newval)
/* Access to Element node is implemented as a convenience method */
switch (nodep->type) {
case XML_ATTRIBUTE_NODE:
dom_attr_value_will_change(obj, (xmlAttrPtr) nodep);
if (php_dom_follow_spec_intern(obj)) {
dom_remove_all_children(nodep);
xmlAddChild(nodep, xmlNewTextLen(BAD_CAST ZSTR_VAL(str), ZSTR_LEN(str)));
Expand Down
1 change: 1 addition & 0 deletions ext/dom/php_dom.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ xmlDocPtr php_dom_create_html_doc(void);
xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference);
void dom_set_xml_class(php_libxml_ref_obj *document);
bool dom_compare_value(const xmlAttr *attr, const xmlChar *value);
void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp);

typedef enum {
DOM_LOAD_STRING = 0,
Expand Down
53 changes: 53 additions & 0 deletions ext/dom/tests/bug79701/id_property.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
--TEST--
Bug #79701 (getElementById does not correctly work with duplicate definitions) - id property variation
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\XMLDocument::createFromString(<<<XML
<root>
<test1/>
<test2/>
</root>
XML);

$test1 = $dom->documentElement->firstElementChild;
$test2 = $test1->nextElementSibling;

echo "--- After parsing ---\n";
var_dump($dom->getElementById("x")?->nodeName);

echo "--- After setting test2 ---\n";
$test2->id = "x";
var_dump($dom->getElementById("x")?->nodeName);
echo "--- After setting test1 ---\n";
$test1->id = "x";
var_dump($dom->getElementById("x")?->nodeName);
echo "--- After resetting test1 ---\n";
$test1->id = "y";
var_dump($dom->getElementById("x")?->nodeName);
echo "--- After resetting test2 ---\n";
$test2->id = "y";
var_dump($dom->getElementById("x")?->nodeName);
echo "--- After resetting test1 ---\n";
$test1->id = "x";
var_dump($dom->getElementById("x")?->nodeName);
echo "--- After calling setIdAttribute with false on test1 ---\n";
$test1->setIdAttribute("id", false);
var_dump($dom->getElementById("x")?->nodeName);
?>
--EXPECT--
--- After parsing ---
NULL
--- After setting test2 ---
string(5) "test2"
--- After setting test1 ---
string(5) "test1"
--- After resetting test1 ---
string(5) "test2"
--- After resetting test2 ---
NULL
--- After resetting test1 ---
string(5) "test1"
--- After calling setIdAttribute with false on test1 ---
NULL
18 changes: 18 additions & 0 deletions ext/dom/tests/bug79701/node.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
--TEST--
Bug #79701 (getElementById does not correctly work with duplicate definitions) - attribute node variation
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\HTMLDocument::createEmpty();
$element = $dom->createElement('foo');
$dom->append($element);
$attr = $dom->createAttribute('id');
$attr->value = 'test';
var_dump($dom->getElementById('test')?->nodeName);
$element->setAttributeNode($attr);
var_dump($dom->getElementById('test')?->nodeName);
?>
--EXPECT--
NULL
string(3) "FOO"
Loading
Loading