Skip to content

Commit 671de4f

Browse files
committed
Fix bug #79701: getElementById does not correctly work with duplicate definitions
This is a long standing bug: IDs aren't properly tracked causing either outdated or plain incorrect results from getElementById. This PR implements a pragmatic solution in which we still try to use the ID lookup table to a degree, but only as a performance boost not as a "single source of truth". Full details are explained in the getElementById code.
1 parent a58c3a7 commit 671de4f

17 files changed

+667
-41
lines changed

ext/dom/attr.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include "php_dom.h"
2727
#include "dom_properties.h"
28+
#include "internal_helpers.h"
2829

2930
/*
3031
* class DOMAttr extends DOMNode
@@ -106,6 +107,16 @@ zend_result dom_attr_specified_read(dom_object *obj, zval *retval)
106107

107108
/* }}} */
108109

110+
void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp)
111+
{
112+
if (attrp->atype == XML_ATTRIBUTE_ID) {
113+
xmlRemoveID(attrp->doc, attrp);
114+
attrp->atype = XML_ATTRIBUTE_ID;
115+
}
116+
117+
dom_mark_ids_modified(obj->document);
118+
}
119+
109120
/* {{{ value string
110121
readonly=no
111122
URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#ID-221662474
@@ -122,6 +133,8 @@ zend_result dom_attr_value_write(dom_object *obj, zval *newval)
122133
{
123134
DOM_PROP_NODE(xmlAttrPtr, attrp, obj);
124135

136+
dom_attr_value_will_change(obj, attrp);
137+
125138
/* Typed property, this is already a string */
126139
ZEND_ASSERT(Z_TYPE_P(newval) == IS_STRING);
127140
zend_string *str = Z_STR_P(newval);

ext/dom/document.c

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,34 +1028,47 @@ Since: DOM Level 2
10281028
*/
10291029
PHP_METHOD(DOMDocument, getElementById)
10301030
{
1031-
zval *id;
10321031
xmlDocPtr docp;
1033-
xmlAttrPtr attrp;
10341032
size_t idname_len;
10351033
dom_object *intern;
10361034
char *idname;
10371035

1038-
id = ZEND_THIS;
1039-
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &idname, &idname_len) == FAILURE) {
1040-
RETURN_THROWS();
1041-
}
1042-
1043-
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
1036+
ZEND_PARSE_PARAMETERS_START(1, 1)
1037+
Z_PARAM_STRING(idname, idname_len)
1038+
ZEND_PARSE_PARAMETERS_END();
10441039

1045-
attrp = xmlGetID(docp, BAD_CAST idname);
1040+
DOM_GET_OBJ(docp, ZEND_THIS, xmlDocPtr, intern);
10461041

1047-
/* From the moment an ID is created, libxml2's behaviour is to cache that element, even
1048-
* if that element is not yet attached to the document. Similarly, only upon destruction of
1049-
* the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply
1050-
* ingrained in the library, and uses the cache for various purposes, it seems like a bad
1051-
* idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check
1052-
* if the node is attached to the document. */
1053-
if (attrp && attrp->parent && php_dom_is_node_connected(attrp->parent)) {
1054-
DOM_RET_OBJ((xmlNodePtr) attrp->parent, intern);
1042+
/* If the document has not been manipulated yet, the ID cache will be in sync and we can trust its result.
1043+
* This check mainly exists because a lot of times people just query a document without modifying it,
1044+
* and we can allow quick access to IDs in that case. */
1045+
if (!dom_is_document_cache_modified_since_parsing(intern->document)) {
1046+
const xmlAttr *attrp = xmlGetID(docp, BAD_CAST idname);
1047+
if (attrp && attrp->parent) {
1048+
DOM_RET_OBJ(attrp->parent, intern);
1049+
}
10551050
} else {
1056-
RETVAL_NULL();
1057-
}
1051+
/* From the moment an ID is created, libxml2's behaviour is to cache that element, even
1052+
* if that element is not yet attached to the document. Similarly, only upon destruction of
1053+
* the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply
1054+
* ingrained in the library, and uses the cache for various purposes, it seems like a bad
1055+
* idea and lost cause to fight it. */
1056+
1057+
const xmlNode *base = (const xmlNode *) docp;
1058+
const xmlNode *node = base->children;
1059+
while (node != NULL) {
1060+
if (node->type == XML_ELEMENT_NODE) {
1061+
for (const xmlAttr *attr = node->properties; attr != NULL; attr = attr->next) {
1062+
if (attr->atype == XML_ATTRIBUTE_ID && dom_compare_value(attr, BAD_CAST idname)) {
1063+
DOM_RET_OBJ((xmlNodePtr) node, intern);
1064+
return;
1065+
}
1066+
}
1067+
}
10581068

1069+
node = php_dom_next_in_tree_order(node, base);
1070+
}
1071+
}
10591072
}
10601073
/* }}} end dom_document_get_element_by_id */
10611074

ext/dom/element.c

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,15 @@ zend_result dom_element_id_read(dom_object *obj, zval *retval)
183183
return dom_element_reflected_attribute_read(obj, retval, "id");
184184
}
185185

186-
static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id);
186+
static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id, php_libxml_ref_obj *document);
187187

188188
zend_result dom_element_id_write(dom_object *obj, zval *newval)
189189
{
190190
xmlAttrPtr attr = dom_element_reflected_attribute_write(obj, newval, "id");
191191
if (!attr) {
192192
return FAILURE;
193193
}
194-
php_set_attribute_id(attr, true);
194+
php_set_attribute_id(attr, true, obj->document);
195195
return SUCCESS;
196196
}
197197
/* }}} */
@@ -352,6 +352,16 @@ static xmlNodePtr dom_create_attribute(xmlNodePtr nodep, const char *name, const
352352
}
353353
}
354354

355+
static void dom_check_register_attribute_id(xmlAttrPtr attr, php_libxml_ref_obj *document)
356+
{
357+
dom_mark_ids_modified(document);
358+
359+
if (attr->atype != XML_ATTRIBUTE_ID && attr->doc->type == XML_HTML_DOCUMENT_NODE && attr->ns == NULL && xmlStrEqual(attr->name, BAD_CAST "id")) {
360+
/* To respect XML's ID behaviour, we only do this registration for HTML documents. */
361+
attr->atype = XML_ATTRIBUTE_ID;
362+
}
363+
}
364+
355365
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-F68F082
356366
Modern spec URL: https://dom.spec.whatwg.org/#dom-element-setattribute
357367
Since:
@@ -360,7 +370,6 @@ PHP_METHOD(DOMElement, setAttribute)
360370
{
361371
zval *id;
362372
xmlNode *nodep;
363-
xmlNodePtr attr = NULL;
364373
int name_valid;
365374
size_t name_len, value_len;
366375
dom_object *intern;
@@ -394,23 +403,26 @@ PHP_METHOD(DOMElement, setAttribute)
394403
}
395404

396405
/* Can't use xmlSetNsProp unconditionally here because that doesn't take into account the qualified name matching... */
397-
attr = (xmlNodePtr) php_dom_get_attribute_node(nodep, BAD_CAST name, name_len);
406+
xmlAttrPtr attr = php_dom_get_attribute_node(nodep, BAD_CAST name, name_len);
398407
if (attr != NULL) {
399-
dom_remove_all_children(attr);
408+
dom_attr_value_will_change(intern, attr);
409+
dom_remove_all_children((xmlNodePtr) attr);
400410
xmlNodePtr node = xmlNewDocText(attr->doc, BAD_CAST value);
401-
xmlAddChild(attr, node);
411+
xmlAddChild((xmlNodePtr) attr, node);
402412
} else {
403-
attr = (xmlNodePtr) xmlSetNsProp(nodep, NULL, name_processed, BAD_CAST value);
413+
attr = xmlSetNsProp(nodep, NULL, name_processed, BAD_CAST value);
414+
dom_check_register_attribute_id(attr, intern->document);
404415
}
405416

406417
if (name_processed != BAD_CAST name) {
407418
efree(name_processed);
408419
}
409420
} else {
410-
attr = dom_get_attribute_or_nsdecl(intern, nodep, BAD_CAST name, name_len);
421+
xmlNodePtr attr = dom_get_attribute_or_nsdecl(intern, nodep, BAD_CAST name, name_len);
411422
if (attr != NULL) {
412423
switch (attr->type) {
413424
case XML_ATTRIBUTE_NODE:
425+
dom_attr_value_will_change(intern, (xmlAttrPtr) attr);
414426
node_list_unlink(attr->children);
415427
break;
416428
case XML_NAMESPACE_DECL:
@@ -693,7 +705,10 @@ static void dom_element_set_attribute_node_common(INTERNAL_FUNCTION_PARAMETERS,
693705

694706
xmlAddChild(nodep, (xmlNodePtr) attrp);
695707
if (!modern) {
708+
dom_mark_ids_modified(intern->document);
696709
php_dom_reconcile_attribute_namespace_after_insertion(attrp);
710+
} else {
711+
dom_check_register_attribute_id(attrp, intern->document);
697712
}
698713

699714
/* Returns old property if removed otherwise NULL */
@@ -958,8 +973,11 @@ static void dom_set_attribute_ns_modern(dom_object *intern, xmlNodePtr elemp, ze
958973
if (errorcode == 0) {
959974
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);
960975
xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(ns_mapper, prefix, xmlStrlen(prefix), uri);
961-
if (UNEXPECTED(xmlSetNsProp(elemp, ns, localname, BAD_CAST value) == NULL)) {
976+
xmlAttrPtr attr = xmlSetNsProp(elemp, ns, localname, BAD_CAST value);
977+
if (UNEXPECTED(attr == NULL)) {
962978
php_dom_throw_error(INVALID_STATE_ERR, /* strict */ true);
979+
} else {
980+
dom_check_register_attribute_id(attr, intern->document);
963981
}
964982
} else {
965983
php_dom_throw_error(errorcode, /* strict */ true);
@@ -993,6 +1011,7 @@ PHP_METHOD(DOMElement, setAttributeNS)
9931011
if (php_dom_follow_spec_intern(intern)) {
9941012
dom_set_attribute_ns_modern(intern, elemp, uri, name, value);
9951013
} else {
1014+
dom_mark_ids_modified(intern->document);
9961015
dom_set_attribute_ns_legacy(intern, elemp, uri ? ZSTR_VAL(uri) : NULL, uri ? ZSTR_LEN(uri) : 0, ZSTR_VAL(name), ZSTR_LEN(name), value);
9971016
}
9981017
}
@@ -1270,20 +1289,16 @@ PHP_METHOD(DOMElement, hasAttributeNS)
12701289
}
12711290
/* }}} end dom_element_has_attribute_ns */
12721291

1273-
static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id) /* {{{ */
1292+
static void php_set_attribute_id(xmlAttrPtr attrp, bool is_id, php_libxml_ref_obj *document) /* {{{ */
12741293
{
1275-
if (is_id == 1 && attrp->atype != XML_ATTRIBUTE_ID) {
1276-
xmlChar *id_val;
1277-
1278-
id_val = xmlNodeListGetString(attrp->doc, attrp->children, 1);
1279-
if (id_val != NULL) {
1280-
xmlAddID(NULL, attrp->doc, id_val, attrp);
1281-
xmlFree(id_val);
1282-
}
1283-
} else if (is_id == 0 && attrp->atype == XML_ATTRIBUTE_ID) {
1294+
if (is_id && attrp->atype != XML_ATTRIBUTE_ID) {
1295+
attrp->atype = XML_ATTRIBUTE_ID;
1296+
} else if (!is_id && attrp->atype == XML_ATTRIBUTE_ID) {
12841297
xmlRemoveID(attrp->doc, attrp);
12851298
attrp->atype = 0;
12861299
}
1300+
1301+
dom_mark_ids_modified(document);
12871302
}
12881303
/* }}} */
12891304

@@ -1311,7 +1326,7 @@ PHP_METHOD(DOMElement, setIdAttribute)
13111326
if (attrp == NULL || attrp->type == XML_ATTRIBUTE_DECL) {
13121327
php_dom_throw_error(NOT_FOUND_ERR, dom_get_strict_error(intern->document));
13131328
} else {
1314-
php_set_attribute_id(attrp, is_id);
1329+
php_set_attribute_id(attrp, is_id, intern->document);
13151330
}
13161331
}
13171332
/* }}} end dom_element_set_id_attribute */
@@ -1340,7 +1355,7 @@ PHP_METHOD(DOMElement, setIdAttributeNS)
13401355
if (attrp == NULL || attrp->type == XML_ATTRIBUTE_DECL) {
13411356
php_dom_throw_error(NOT_FOUND_ERR, dom_get_strict_error(intern->document));
13421357
} else {
1343-
php_set_attribute_id(attrp, is_id);
1358+
php_set_attribute_id(attrp, is_id, intern->document);
13441359
}
13451360
}
13461361
/* }}} end dom_element_set_id_attribute_ns */
@@ -1367,7 +1382,7 @@ static void dom_element_set_id_attribute_node(INTERNAL_FUNCTION_PARAMETERS, zend
13671382
if (attrp->parent != nodep) {
13681383
php_dom_throw_error(NOT_FOUND_ERR, dom_get_strict_error(intern->document));
13691384
} else {
1370-
php_set_attribute_id(attrp, is_id);
1385+
php_set_attribute_id(attrp, is_id, intern->document);
13711386
}
13721387
}
13731388

ext/dom/internal_helpers.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,31 @@ DOM_DEF_GET_CE_FUNC(domimplementation)
6363

6464
#endif
6565

66+
static zend_always_inline size_t dom_minimum_modification_nr_since_parsing(php_libxml_ref_obj *doc_ptr)
67+
{
68+
/* For old-style DOM, we need a "new DOMDocument" + a load, so the minimum modification nr is 2.
69+
* For new-style DOM, we need only to call a named constructor, so the minimum modification nr is 1. */
70+
return doc_ptr->class_type == PHP_LIBXML_CLASS_MODERN ? 1 : 2;
71+
}
72+
73+
static zend_always_inline void dom_mark_document_cache_as_modified_since_parsing(php_libxml_ref_obj *doc_ptr)
74+
{
75+
if (doc_ptr) {
76+
doc_ptr->cache_tag.modification_nr = MAX(dom_minimum_modification_nr_since_parsing(doc_ptr) + 1,
77+
doc_ptr->cache_tag.modification_nr);
78+
}
79+
}
80+
81+
/* Marks the ID cache as potentially stale */
82+
static zend_always_inline void dom_mark_ids_modified(php_libxml_ref_obj *doc_ptr)
83+
{
84+
/* Although this is currently a wrapper function, it's best to abstract the actual implementation away. */
85+
dom_mark_document_cache_as_modified_since_parsing(doc_ptr);
86+
}
87+
88+
static zend_always_inline bool dom_is_document_cache_modified_since_parsing(php_libxml_ref_obj *doc_ptr)
89+
{
90+
return !doc_ptr || doc_ptr->cache_tag.modification_nr > dom_minimum_modification_nr_since_parsing(doc_ptr);
91+
}
92+
6693
#endif

ext/dom/node.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ zend_result dom_node_node_value_write(dom_object *obj, zval *newval)
185185
/* Access to Element node is implemented as a convenience method */
186186
switch (nodep->type) {
187187
case XML_ATTRIBUTE_NODE:
188+
dom_attr_value_will_change(obj, (xmlAttrPtr) nodep);
188189
if (php_dom_follow_spec_intern(obj)) {
189190
dom_remove_all_children(nodep);
190191
xmlAddChild(nodep, xmlNewTextLen(BAD_CAST ZSTR_VAL(str), ZSTR_LEN(str)));

ext/dom/php_dom.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ xmlDocPtr php_dom_create_html_doc(void);
174174
xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference);
175175
void dom_set_xml_class(php_libxml_ref_obj *document);
176176
bool dom_compare_value(const xmlAttr *attr, const xmlChar *value);
177+
void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp);
177178

178179
typedef enum {
179180
DOM_LOAD_STRING = 0,
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
--TEST--
2+
Bug #79701 (getElementById does not correctly work with duplicate definitions) - id property variation
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
$dom = Dom\XMLDocument::createFromString(<<<XML
8+
<root>
9+
<test1/>
10+
<test2/>
11+
</root>
12+
XML);
13+
14+
$test1 = $dom->documentElement->firstElementChild;
15+
$test2 = $test1->nextElementSibling;
16+
17+
echo "--- After parsing ---\n";
18+
var_dump($dom->getElementById("x")?->nodeName);
19+
20+
echo "--- After setting test2 ---\n";
21+
$test2->id = "x";
22+
var_dump($dom->getElementById("x")?->nodeName);
23+
echo "--- After setting test1 ---\n";
24+
$test1->id = "x";
25+
var_dump($dom->getElementById("x")?->nodeName);
26+
echo "--- After resetting test1 ---\n";
27+
$test1->id = "y";
28+
var_dump($dom->getElementById("x")?->nodeName);
29+
echo "--- After resetting test2 ---\n";
30+
$test2->id = "y";
31+
var_dump($dom->getElementById("x")?->nodeName);
32+
echo "--- After resetting test1 ---\n";
33+
$test1->id = "x";
34+
var_dump($dom->getElementById("x")?->nodeName);
35+
echo "--- After calling setIdAttribute with false on test1 ---\n";
36+
$test1->setIdAttribute("id", false);
37+
var_dump($dom->getElementById("x")?->nodeName);
38+
?>
39+
--EXPECT--
40+
--- After parsing ---
41+
NULL
42+
--- After setting test2 ---
43+
string(5) "test2"
44+
--- After setting test1 ---
45+
string(5) "test1"
46+
--- After resetting test1 ---
47+
string(5) "test2"
48+
--- After resetting test2 ---
49+
NULL
50+
--- After resetting test1 ---
51+
string(5) "test1"
52+
--- After calling setIdAttribute with false on test1 ---
53+
NULL

ext/dom/tests/bug79701/node.phpt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
--TEST--
2+
Bug #79701 (getElementById does not correctly work with duplicate definitions) - attribute node variation
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
$dom = Dom\HTMLDocument::createEmpty();
8+
$element = $dom->createElement('foo');
9+
$dom->append($element);
10+
$attr = $dom->createAttribute('id');
11+
$attr->value = 'test';
12+
var_dump($dom->getElementById('test')?->nodeName);
13+
$element->setAttributeNode($attr);
14+
var_dump($dom->getElementById('test')?->nodeName);
15+
?>
16+
--EXPECT--
17+
NULL
18+
string(3) "FOO"

0 commit comments

Comments
 (0)