Skip to content

Commit 4ee09de

Browse files
committed
Initial implementation of CSS selectors
1 parent 592193b commit 4ee09de

File tree

9 files changed

+2066
-27
lines changed

9 files changed

+2066
-27
lines changed

codecov.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
ignore:
22
- "ext/dom/lexbor/lexbor" # bundled library
3+
- "!ext/dom/lexbor/lexbor/selectors-adapted"
34
- "ext/pcre/pcre2lib" # bundled library

ext/dom/config.m4

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ if test "$PHP_DOM" != "no"; then
2222
$LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \
2323
$LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \
2424
$LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \
25+
$LEXBOR_DIR/css/state.c $LEXBOR_DIR/css/log.c $LEXBOR_DIR/css/parser.c $LEXBOR_DIR/css/selectors/state.c $LEXBOR_DIR/css/selectors/selectors.c $LEXBOR_DIR/css/selectors/selector.c $LEXBOR_DIR/css/selectors/pseudo_state.c $LEXBOR_DIR/css/selectors/pseudo.c $LEXBOR_DIR/css/syntax/tokenizer/error.c $LEXBOR_DIR/css/syntax/state.c $LEXBOR_DIR/css/syntax/parser.c $LEXBOR_DIR/css/syntax/syntax.c $LEXBOR_DIR/css/syntax/anb.c $LEXBOR_DIR/css/syntax/tokenizer.c $LEXBOR_DIR/css/syntax/token.c $LEXBOR_DIR/css/css.c \
26+
$LEXBOR_DIR/selectors-adapted/selectors.c \
2527
$LEXBOR_DIR/ns/ns.c \
2628
$LEXBOR_DIR/tag/tag.c"
2729
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
@@ -44,7 +46,10 @@ if test "$PHP_DOM" != "no"; then
4446
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/tokenizer)
4547
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/interfaces)
4648
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/encoding)
47-
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors)
49+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/selectors)
50+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/tokenizer)
51+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/syntax)
52+
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors-adapted)
4853
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ns)
4954
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/tag)
5055
PHP_SUBST(DOM_SHARED_LIBADD)

ext/dom/config.w32

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ if (PHP_DOM == "yes") {
2727
ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom");
2828
ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom");
2929
ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom");
30+
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors-adapted", "selectors.c", "dom");
31+
ADD_SOURCES("ext/dom/lexbor/lexbor/css", "state.c log.c parser.c css.c", "dom");
32+
ADD_SOURCES("ext/dom/lexbor/lexbor/css/selectors", "state.c selectors.c selector.c pseudo_state.c pseudo.c", "dom");
33+
ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax", "state.c parser.c syntax.c anb.c tokenizer.c token.c", "dom");
34+
ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax/tokenizer", "error.c", "dom");
3035
ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom");
3136
ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom");
3237
ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC ");

ext/dom/element.c

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include "php_dom.h"
2525
#include "namespace_compat.h"
2626

27+
#include "lexbor/css/parser.h"
28+
#include "lexbor/selectors-adapted/selectors.h"
29+
2730
/*
2831
* class DOMElement extends DOMNode
2932
*
@@ -1640,4 +1643,135 @@ PHP_METHOD(DOMElement, toggleAttribute)
16401643
}
16411644
/* }}} end DOMElement::prepend */
16421645

1646+
/**
1647+
* CSS selector implementation below
1648+
*/
1649+
1650+
typedef struct {
1651+
HashTable *list;
1652+
dom_object *intern;
1653+
} dom_query_selector_all_ctx;
1654+
1655+
lxb_status_t php_dom_query_selector_find_single_callback(const xmlNode *node, lxb_css_selector_specificity_t spec, void *ctx)
1656+
{
1657+
xmlNodePtr *result = (xmlNodePtr *) ctx;
1658+
*result = (xmlNodePtr) node;
1659+
return LXB_STATUS_STOP;
1660+
}
1661+
1662+
lxb_status_t php_dom_query_selector_find_array_callback(const xmlNode *node, lxb_css_selector_specificity_t spec, void *ctx)
1663+
{
1664+
dom_query_selector_all_ctx *qsa_ctx = (dom_query_selector_all_ctx *) ctx;
1665+
zval object;
1666+
php_dom_create_object((xmlNodePtr) node, &object, qsa_ctx->intern);
1667+
zend_hash_next_index_insert_new(qsa_ctx->list, &object);
1668+
return LXB_STATUS_OK;
1669+
}
1670+
1671+
static lxb_status_t php_dom_query_selector_common(zval *return_value, dom_object *intern, const xmlNode *root, zend_string *selectors_str, lxb_selectors_cb_f cb, void *ctx)
1672+
{
1673+
lxb_status_t status;
1674+
1675+
if (root->type == XML_DOCUMENT_NODE || root->type == XML_HTML_DOCUMENT_NODE) {
1676+
root = xmlDocGetRootElement((const xmlDoc *) root);
1677+
}
1678+
1679+
lxb_css_parser_t parser;
1680+
status = lxb_css_parser_init(&parser, NULL);
1681+
ZEND_ASSERT(status == LXB_STATUS_OK);
1682+
1683+
lxb_selectors_t selectors;
1684+
status = lxb_selectors_init(&selectors);
1685+
ZEND_ASSERT(status == LXB_STATUS_OK);
1686+
1687+
lxb_css_selector_list_t *list = lxb_css_selectors_parse(&parser, (const lxb_char_t *) ZSTR_VAL(selectors_str), ZSTR_LEN(selectors_str));
1688+
if (UNEXPECTED(list == NULL)) {
1689+
size_t nr_of_messages = lexbor_array_obj_length(&parser.log->messages);
1690+
if (nr_of_messages > 0) {
1691+
lxb_css_log_message_t *msg = lexbor_array_obj_get(&parser.log->messages, 0);
1692+
zend_argument_value_error(1, "must be a valid selector (%.*s)", msg->text.length, msg->text.data);
1693+
} else {
1694+
zend_argument_value_error(1, "must be a valid selector");
1695+
}
1696+
status = LXB_STATUS_ERROR;
1697+
} else {
1698+
status = lxb_selectors_find(&selectors, root, list, cb, ctx);
1699+
if (UNEXPECTED(status != LXB_STATUS_OK && status != LXB_STATUS_STOP)) {
1700+
/* Shouldn't happen, but for safety reasons let's throw here... */
1701+
zend_throw_error(NULL, "Unexpected failure during evaluation of selector");
1702+
}
1703+
}
1704+
1705+
lxb_css_selector_list_destroy_memory(list);
1706+
(void) lxb_selectors_destroy(&selectors);
1707+
(void) lxb_css_parser_destroy(&parser, false);
1708+
1709+
return status;
1710+
}
1711+
1712+
PHP_METHOD(DOMElement, querySelector)
1713+
{
1714+
zend_string *selectors_str;
1715+
1716+
ZEND_PARSE_PARAMETERS_START(1, 1)
1717+
Z_PARAM_STR(selectors_str)
1718+
ZEND_PARSE_PARAMETERS_END();
1719+
1720+
xmlNodePtr thisp;
1721+
dom_object *intern;
1722+
zval *id;
1723+
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);
1724+
1725+
xmlNodePtr result = NULL;
1726+
1727+
if (php_dom_query_selector_common(
1728+
return_value,
1729+
intern,
1730+
thisp,
1731+
selectors_str,
1732+
php_dom_query_selector_find_single_callback,
1733+
&result
1734+
) != LXB_STATUS_OK || result == NULL) {
1735+
RETURN_NULL();
1736+
} else {
1737+
int ret;
1738+
DOM_RET_OBJ(result, &ret, intern);
1739+
}
1740+
}
1741+
1742+
PHP_METHOD(DOMElement, querySelectorAll)
1743+
{
1744+
zend_string *selectors_str;
1745+
1746+
ZEND_PARSE_PARAMETERS_START(1, 1)
1747+
Z_PARAM_STR(selectors_str)
1748+
ZEND_PARSE_PARAMETERS_END();
1749+
1750+
xmlNodePtr thisp;
1751+
dom_object *intern;
1752+
zval *id;
1753+
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);
1754+
1755+
HashTable *list = zend_new_array(0);
1756+
dom_query_selector_all_ctx ctx = { list, intern };
1757+
1758+
if (php_dom_query_selector_common(
1759+
return_value,
1760+
intern,
1761+
thisp,
1762+
selectors_str,
1763+
php_dom_query_selector_find_array_callback,
1764+
&ctx
1765+
) != LXB_STATUS_OK) {
1766+
zend_array_destroy(list);
1767+
RETURN_THROWS();
1768+
} else {
1769+
php_dom_create_iterator(return_value, DOM_NODELIST);
1770+
dom_object *ret_obj = Z_DOMOBJ_P(return_value);
1771+
dom_nnodemap_object *mapptr = (dom_nnodemap_object *) ret_obj->ptr;
1772+
ZVAL_ARR(&mapptr->baseobj_zv, list);
1773+
mapptr->nodetype = DOM_NODESET;
1774+
}
1775+
}
1776+
16431777
#endif

0 commit comments

Comments
 (0)