Skip to content

Commit 4676686

Browse files
committed
Implement quirks mode
1 parent b693514 commit 4676686

File tree

12 files changed

+114
-22
lines changed

12 files changed

+114
-22
lines changed

UPGRADING.INTERNALS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ PHP 8.4 INTERNALS UPGRADE NOTES
195195
- Removed the "properties" HashTable field from php_libxml_node_object.
196196
- Added a way to attached private data to a php_libxml_ref_obj.
197197
- Added a way to fix a class type onto php_libxml_ref_obj.
198+
- Added a way to record quirks mode in php_libxml_ref_obj.
198199
- Added php_libxml_uses_internal_errors().
199200

200201
e. ext/date

ext/dom/element.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1757,7 +1757,7 @@ PHP_METHOD(DOM_Element, matches)
17571757
zval *id;
17581758
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);
17591759

1760-
dom_element_matches(thisp, return_value, selectors_str);
1760+
dom_element_matches(thisp, intern, return_value, selectors_str);
17611761
}
17621762

17631763
PHP_METHOD(DOM_Element, closest)

ext/dom/html5_parser.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxm
380380
observations->has_explicit_html_tag = tree->has_explicit_html_tag;
381381
observations->has_explicit_head_tag = tree->has_explicit_head_tag;
382382
observations->has_explicit_body_tag = tree->has_explicit_body_tag;
383+
observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS;
383384
}
384385

385386
#endif /* HAVE_LIBXML && HAVE_DOM */

ext/dom/html5_parser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations {
4747
bool has_explicit_html_tag;
4848
bool has_explicit_head_tag;
4949
bool has_explicit_body_tag;
50+
bool quirks_mode;
5051
} lexbor_libxml2_bridge_extracted_observations;
5152

5253
typedef struct _lexbor_libxml2_bridge_parse_context {

ext/dom/html_document.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
917917
NULL
918918
);
919919
intern->document->class_type = PHP_LIBXML_CLASS_MODERN;
920+
intern->document->quirks_mode = ctx.observations.quirks_mode;
920921
intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
921922
return;
922923

@@ -1137,6 +1138,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
11371138
NULL
11381139
);
11391140
intern->document->class_type = PHP_LIBXML_CLASS_MODERN;
1141+
intern->document->quirks_mode = ctx.observations.quirks_mode;
11401142
intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
11411143
return;
11421144

ext/dom/lexbor/lexbor/selectors-adapted/selectors.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ lxb_selectors_match_element(const lxb_css_selector_t *selector,
159159
const xmlNode *node, lxb_selectors_entry_t *entry);
160160

161161
static bool
162-
lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node);
162+
lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node, bool quirks);
163163

164164
static bool
165165
lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src,
@@ -1161,7 +1161,7 @@ lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
11611161
return lxb_selectors_match_element(selector, node, entry);
11621162

11631163
case LXB_CSS_SELECTOR_TYPE_ID:
1164-
return lxb_selectors_match_id(selector, node);
1164+
return lxb_selectors_match_id(selector, node, selectors->options & LXB_SELECTORS_OPT_QUIRKS_MODE);
11651165

11661166
case LXB_CSS_SELECTOR_TYPE_CLASS: {
11671167
const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "class");
@@ -1176,7 +1176,7 @@ lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
11761176
}
11771177

11781178
return lxb_selectors_match_class(&trg,
1179-
&selector->name, true);
1179+
&selector->name, selectors->options & LXB_SELECTORS_OPT_QUIRKS_MODE);
11801180
}
11811181

11821182
case LXB_CSS_SELECTOR_TYPE_ATTRIBUTE:
@@ -1210,7 +1210,7 @@ lxb_selectors_match_element(const lxb_css_selector_t *selector,
12101210
}
12111211

12121212
static bool
1213-
lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node)
1213+
lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node, bool quirks)
12141214
{
12151215
const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "id");
12161216
if (dom_attr == NULL) {
@@ -1219,10 +1219,12 @@ lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node)
12191219

12201220
const lexbor_str_t *src = &selector->name;
12211221
lexbor_str_t trg = lxb_selectors_adapted_attr_value_empty(dom_attr);
1222-
if (trg.length == src->length
1223-
&& lexbor_str_data_ncasecmp(trg.data, src->data, src->length))
1224-
{
1225-
return true;
1222+
if (trg.length == src->length) {
1223+
if (quirks) {
1224+
return lexbor_str_data_ncasecmp(trg.data, src->data, src->length);
1225+
} else {
1226+
return lexbor_str_data_ncmp(trg.data, src->data, src->length);
1227+
}
12261228
}
12271229

12281230
return false;

ext/dom/lexbor/lexbor/selectors-adapted/selectors.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ typedef enum {
5656
* any of the selectors. That is, the callback will be called only once
5757
* for example above. This way we get rid of duplicates in the search.
5858
*/
59-
LXB_SELECTORS_OPT_MATCH_FIRST = 1 << 2
59+
LXB_SELECTORS_OPT_MATCH_FIRST = 1 << 2,
60+
61+
/* Quirks mode (sigh) */
62+
LXB_SELECTORS_OPT_QUIRKS_MODE = 1 << 3,
6063
}
6164
lxb_selectors_opt_t;
6265

ext/dom/parentnode/css_selectors.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ typedef struct {
3737
bool result;
3838
} dom_query_selector_matches_ctx;
3939

40+
static lxb_selectors_opt_t dom_quirks_opt(lxb_selectors_opt_t options, const dom_object *intern)
41+
{
42+
if (intern->document != NULL && intern->document->quirks_mode) {
43+
options |= LXB_SELECTORS_OPT_QUIRKS_MODE;
44+
}
45+
return options;
46+
}
47+
4048
lxb_status_t dom_query_selector_find_single_callback(const xmlNode *node, lxb_css_selector_specificity_t spec, void *ctx)
4149
{
4250
xmlNodePtr *result = (xmlNodePtr *) ctx;
@@ -67,7 +75,8 @@ static lxb_css_selector_list_t *dom_parse_selector(
6775
lxb_css_parser_t *parser,
6876
lxb_selectors_t *selectors,
6977
const zend_string *selectors_str,
70-
lxb_selectors_opt_t options
78+
lxb_selectors_opt_t options,
79+
const dom_object *intern
7180
)
7281
{
7382
lxb_status_t status;
@@ -79,7 +88,7 @@ static lxb_css_selector_list_t *dom_parse_selector(
7988
memset(selectors, 0, sizeof(lxb_selectors_t));
8089
status = lxb_selectors_init(selectors);
8190
ZEND_ASSERT(status == LXB_STATUS_OK);
82-
lxb_selectors_opt_set(selectors, options);
91+
lxb_selectors_opt_set(selectors, dom_quirks_opt(options, intern));
8392

8493
lxb_css_selector_list_t *list = lxb_css_selectors_parse(parser, (const lxb_char_t *) ZSTR_VAL(selectors_str), ZSTR_LEN(selectors_str));
8594
if (UNEXPECTED(list == NULL)) {
@@ -115,8 +124,8 @@ static void dom_selector_cleanup(lxb_css_parser_t *parser, lxb_selectors_t *sele
115124
}
116125

117126
static lxb_status_t dom_query_selector_common(
118-
zval *return_value,
119127
const xmlNode *root,
128+
const dom_object *intern,
120129
const zend_string *selectors_str,
121130
lxb_selectors_cb_f cb,
122131
void *ctx,
@@ -128,7 +137,7 @@ static lxb_status_t dom_query_selector_common(
128137
lxb_css_parser_t parser;
129138
lxb_selectors_t selectors;
130139

131-
lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, options);
140+
lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, options, intern);
132141
if (UNEXPECTED(list == NULL)) {
133142
status = LXB_STATUS_ERROR;
134143
} else {
@@ -143,6 +152,7 @@ static lxb_status_t dom_query_selector_common(
143152

144153
static lxb_status_t dom_query_matches(
145154
const xmlNode *root,
155+
const dom_object *intern,
146156
const zend_string *selectors_str,
147157
void *ctx
148158
)
@@ -152,7 +162,7 @@ static lxb_status_t dom_query_matches(
152162
lxb_css_parser_t parser;
153163
lxb_selectors_t selectors;
154164

155-
lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, LXB_SELECTORS_OPT_MATCH_FIRST);
165+
lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, LXB_SELECTORS_OPT_MATCH_FIRST, intern);
156166
if (UNEXPECTED(list == NULL)) {
157167
status = LXB_STATUS_ERROR;
158168
} else {
@@ -167,6 +177,7 @@ static lxb_status_t dom_query_matches(
167177

168178
static const xmlNode *dom_query_closest(
169179
const xmlNode *root,
180+
const dom_object *intern,
170181
const zend_string *selectors_str
171182
)
172183
{
@@ -175,7 +186,7 @@ static const xmlNode *dom_query_closest(
175186
lxb_css_parser_t parser;
176187
lxb_selectors_t selectors;
177188

178-
lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, LXB_SELECTORS_OPT_MATCH_FIRST);
189+
lxb_css_selector_list_t *list = dom_parse_selector(&parser, &selectors, selectors_str, LXB_SELECTORS_OPT_MATCH_FIRST, intern);
179190
if (EXPECTED(list != NULL)) {
180191
const xmlNode *current = root;
181192
while (current != NULL) {
@@ -204,8 +215,8 @@ void dom_parent_node_query_selector(xmlNodePtr thisp, dom_object *intern, zval *
204215
xmlNodePtr result = NULL;
205216

206217
if (dom_query_selector_common(
207-
return_value,
208218
thisp,
219+
intern,
209220
selectors_str,
210221
dom_query_selector_find_single_callback,
211222
&result,
@@ -224,8 +235,8 @@ void dom_parent_node_query_selector_all(xmlNodePtr thisp, dom_object *intern, zv
224235
dom_query_selector_all_ctx ctx = { list, intern };
225236

226237
if (dom_query_selector_common(
227-
return_value,
228238
thisp,
239+
intern,
229240
selectors_str,
230241
dom_query_selector_find_array_callback,
231242
&ctx,
@@ -243,12 +254,13 @@ void dom_parent_node_query_selector_all(xmlNodePtr thisp, dom_object *intern, zv
243254
}
244255

245256
/* https://dom.spec.whatwg.org/#dom-element-matches */
246-
void dom_element_matches(xmlNodePtr thisp, zval *return_value, const zend_string *selectors_str)
257+
void dom_element_matches(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str)
247258
{
248259
dom_query_selector_matches_ctx ctx = { thisp, false };
249260

250261
if (dom_query_matches(
251262
thisp,
263+
intern,
252264
selectors_str,
253265
&ctx
254266
) != LXB_STATUS_OK) {
@@ -261,7 +273,7 @@ void dom_element_matches(xmlNodePtr thisp, zval *return_value, const zend_string
261273
/* https://dom.spec.whatwg.org/#dom-element-closest */
262274
void dom_element_closest(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str)
263275
{
264-
const xmlNode *result = dom_query_closest(thisp, selectors_str);
276+
const xmlNode *result = dom_query_closest(thisp, intern, selectors_str);
265277
if (EXPECTED(result != NULL)) {
266278
DOM_RET_OBJ((xmlNodePtr) result, intern);
267279
}

ext/dom/php_dom.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ bool php_dom_pre_insert(php_libxml_ref_obj *document, xmlNodePtr node, xmlNodePt
198198
bool php_dom_pre_insert_is_parent_invalid(xmlNodePtr parent);
199199
void dom_parent_node_query_selector(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
200200
void dom_parent_node_query_selector_all(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
201-
void dom_element_matches(xmlNodePtr thisp, zval *return_value, const zend_string *selectors_str);
201+
void dom_element_matches(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
202202
void dom_element_closest(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
203203

204204
/* nodemap and nodelist APIs */
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
--TEST--
2+
CSS Selectors - Quirks mode test
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
require __DIR__ . '/test_utils.inc';
9+
10+
echo "\n=== Document in quirks mode ===\n\n";
11+
12+
$dom = DOM\HTMLDocument::createFromString(<<<HTML
13+
<html>
14+
<div class="HElLoWorLD"/>
15+
<div id="hI"/>
16+
</html>
17+
HTML, LIBXML_NOERROR);
18+
19+
test_helper($dom, 'div.helloworld');
20+
test_helper($dom, 'div.HElLoWorLD');
21+
test_helper($dom, '#hi');
22+
test_helper($dom, '#hI');
23+
24+
echo "\n=== Document not in quirks mode ===\n\n";
25+
26+
$dom = DOM\HTMLDocument::createFromString(<<<HTML
27+
<!DOCTYPE html>
28+
<html>
29+
<div class="HElLoWorLD"/>
30+
<div id="hI"/>
31+
</html>
32+
HTML, LIBXML_NOERROR);
33+
34+
test_helper($dom, 'div.helloworld');
35+
test_helper($dom, 'div.HElLoWorLD');
36+
test_helper($dom, '#hi');
37+
test_helper($dom, '#hI');
38+
39+
?>
40+
--EXPECT--
41+
=== Document in quirks mode ===
42+
43+
--- Selector: div.helloworld ---
44+
<div xmlns="http://www.w3.org/1999/xhtml" class="HElLoWorLD">
45+
<div id="hI">
46+
</div></div>
47+
--- Selector: div.HElLoWorLD ---
48+
<div xmlns="http://www.w3.org/1999/xhtml" class="HElLoWorLD">
49+
<div id="hI">
50+
</div></div>
51+
--- Selector: #hi ---
52+
<div xmlns="http://www.w3.org/1999/xhtml" id="hI">
53+
</div>
54+
--- Selector: #hI ---
55+
<div xmlns="http://www.w3.org/1999/xhtml" id="hI">
56+
</div>
57+
58+
=== Document not in quirks mode ===
59+
60+
--- Selector: div.helloworld ---
61+
--- Selector: div.HElLoWorLD ---
62+
<div xmlns="http://www.w3.org/1999/xhtml" class="HElLoWorLD">
63+
<div id="hI">
64+
</div></div>
65+
--- Selector: #hi ---
66+
--- Selector: #hI ---
67+
<div xmlns="http://www.w3.org/1999/xhtml" id="hI">
68+
</div>

ext/libxml/libxml.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,6 +1355,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object,
13551355
object->document->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */
13561356
object->document->private_data = NULL;
13571357
object->document->class_type = PHP_LIBXML_CLASS_UNSET;
1358+
object->document->quirks_mode = false;
13581359
}
13591360

13601361
return ret_refcount;

ext/libxml/php_libxml.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ typedef struct _php_libxml_ref_obj {
8989
php_libxml_cache_tag cache_tag;
9090
php_libxml_private_data_header *private_data;
9191
int refcount;
92-
php_libxml_class_type class_type;
92+
php_libxml_class_type class_type : 8;
93+
bool quirks_mode;
9394
} php_libxml_ref_obj;
9495

9596
typedef struct _php_libxml_node_ptr {

0 commit comments

Comments
 (0)