Skip to content

Commit a3d38bb

Browse files
committed
Add PhpToken methods
1 parent fac1a2d commit a3d38bb

File tree

3 files changed

+150
-26
lines changed

3 files changed

+150
-26
lines changed

ext/tokenizer/tokenizer.c

Lines changed: 131 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,126 @@ zend_module_entry tokenizer_module_entry = {
7676
ZEND_GET_MODULE(tokenizer)
7777
#endif
7878

79+
static zval *php_token_get_id(zval *obj) {
80+
zval *id = OBJ_PROP_NUM(Z_OBJ_P(obj), 0);
81+
if (Z_ISUNDEF_P(id)) {
82+
zend_throw_error(NULL,
83+
"Typed property PhpToken::$id must not be accessed before initialization");
84+
return NULL;
85+
}
86+
87+
ZVAL_DEREF(id);
88+
ZEND_ASSERT(Z_TYPE_P(id) == IS_LONG);
89+
return id;
90+
}
91+
92+
static zval *php_token_get_text(zval *obj) {
93+
zval *text = OBJ_PROP_NUM(Z_OBJ_P(obj), 1);
94+
if (Z_ISUNDEF_P(text)) {
95+
zend_throw_error(NULL,
96+
"Typed property PhpToken::$text must not be accessed before initialization");
97+
return NULL;
98+
}
99+
100+
ZVAL_DEREF(text);
101+
ZEND_ASSERT(Z_TYPE_P(text) == IS_STRING);
102+
return text;
103+
}
104+
105+
PHP_METHOD(PhpToken, is)
106+
{
107+
zval *kind;
108+
109+
ZEND_PARSE_PARAMETERS_START(1, 1)
110+
Z_PARAM_ZVAL(kind)
111+
ZEND_PARSE_PARAMETERS_END();
112+
113+
if (Z_TYPE_P(kind) == IS_LONG) {
114+
zval *id_zval = php_token_get_id(ZEND_THIS);
115+
if (!id_zval) {
116+
RETURN_THROWS();
117+
}
118+
119+
RETURN_BOOL(Z_LVAL_P(id_zval) == Z_LVAL_P(kind));
120+
} else if (Z_TYPE_P(kind) == IS_STRING) {
121+
zval *text_zval = php_token_get_text(ZEND_THIS);
122+
if (!text_zval) {
123+
RETURN_THROWS();
124+
}
125+
126+
RETURN_BOOL(zend_string_equals(Z_STR_P(text_zval), Z_STR_P(kind)));
127+
} else if (Z_TYPE_P(kind) == IS_ARRAY) {
128+
zval *id_zval = NULL, *text_zval = NULL, *entry;
129+
ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(kind), entry) {
130+
ZVAL_DEREF(entry);
131+
if (Z_TYPE_P(entry) == IS_LONG) {
132+
if (!id_zval) {
133+
id_zval = php_token_get_id(ZEND_THIS);
134+
if (!id_zval) {
135+
RETURN_THROWS();
136+
}
137+
}
138+
if (Z_LVAL_P(id_zval) == Z_LVAL_P(entry)) {
139+
RETURN_TRUE;
140+
}
141+
} else if (Z_TYPE_P(entry) == IS_STRING) {
142+
if (!text_zval) {
143+
text_zval = php_token_get_text(ZEND_THIS);
144+
if (!text_zval) {
145+
RETURN_THROWS();
146+
}
147+
}
148+
if (zend_string_equals(Z_STR_P(text_zval), Z_STR_P(entry))) {
149+
RETURN_TRUE;
150+
}
151+
} else {
152+
zend_type_error("Kind array must have elements of type int or string");
153+
RETURN_THROWS();
154+
}
155+
} ZEND_HASH_FOREACH_END();
156+
RETURN_FALSE;
157+
} else {
158+
zend_type_error("Kind must be of type int, string or array");
159+
RETURN_THROWS();
160+
}
161+
}
162+
163+
PHP_METHOD(PhpToken, isIgnorable)
164+
{
165+
ZEND_PARSE_PARAMETERS_NONE();
166+
167+
zval *id_zval = php_token_get_id(ZEND_THIS);
168+
if (!id_zval) {
169+
RETURN_THROWS();
170+
}
171+
172+
zend_long id = Z_LVAL_P(id_zval);
173+
RETURN_BOOL(id == T_WHITESPACE || id == T_COMMENT || id == T_DOC_COMMENT || id == T_OPEN_TAG);
174+
}
175+
176+
PHP_METHOD(PhpToken, getTokenName)
177+
{
178+
ZEND_PARSE_PARAMETERS_NONE();
179+
180+
zval *id_zval = php_token_get_id(ZEND_THIS);
181+
if (!id_zval) {
182+
RETURN_THROWS();
183+
}
184+
185+
if (Z_LVAL_P(id_zval) < 256) {
186+
RETURN_INTERNED_STR(ZSTR_CHAR(Z_LVAL_P(id_zval)));
187+
} else {
188+
RETURN_STRING(get_token_type_name(Z_LVAL_P(id_zval)));
189+
}
190+
}
191+
192+
static const zend_function_entry php_token_methods[] = {
193+
PHP_ME(PhpToken, is, arginfo_class_PhpToken_is, ZEND_ACC_PUBLIC)
194+
PHP_ME(PhpToken, isIgnorable, arginfo_class_PhpToken_isIgnorable, ZEND_ACC_PUBLIC)
195+
PHP_ME(PhpToken, getTokenName, arginfo_class_PhpToken_getTokenName, ZEND_ACC_PUBLIC)
196+
PHP_FE_END
197+
};
198+
79199
/* {{{ PHP_MINIT_FUNCTION
80200
*/
81201
PHP_MINIT_FUNCTION(tokenizer)
@@ -88,7 +208,7 @@ PHP_MINIT_FUNCTION(tokenizer)
88208
tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
89209
tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
90210

91-
INIT_CLASS_ENTRY(ce, "PhpToken", NULL);
211+
INIT_CLASS_ENTRY(ce, "PhpToken", php_token_methods);
92212
php_token_ce = zend_register_internal_class(&ce);
93213

94214
name = zend_string_init("id", sizeof("id") - 1, 1);
@@ -125,40 +245,31 @@ PHP_MINFO_FUNCTION(tokenizer)
125245
}
126246
/* }}} */
127247

128-
static zend_string *make_str(unsigned char *text, size_t leng, HashTable *interned_strings) {
248+
static inline zend_string *make_str(unsigned char *text, size_t leng) {
129249
if (leng == 1) {
130250
return ZSTR_CHAR(text[0]);
131-
} else if (interned_strings) {
132-
zend_string *interned_str = zend_hash_str_find_ptr(interned_strings, (char *) text, leng);
133-
if (interned_str) {
134-
return zend_string_copy(interned_str);
135-
}
136-
interned_str = zend_string_init((char *) text, leng, 0);
137-
zend_hash_add_new_ptr(interned_strings, interned_str, interned_str);
138-
return interned_str;
139251
} else {
140252
return zend_string_init((char *) text, leng, 0);
141253
}
142254
}
143255

144-
static void add_token(
145-
zval *return_value, int token_type, unsigned char *text, size_t leng, int lineno,
146-
zend_bool as_object, HashTable *interned_strings) {
256+
static void add_token(zval *return_value, int token_type,
257+
unsigned char *text, size_t leng, int lineno, zend_bool as_object) {
147258
zval token;
148259
if (as_object) {
149260
zend_object *obj = zend_objects_new(php_token_ce);
150261
ZVAL_OBJ(&token, obj);
151262
ZVAL_LONG(OBJ_PROP_NUM(obj, 0), token_type);
152-
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng, interned_strings));
263+
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng));
153264
ZVAL_LONG(OBJ_PROP_NUM(obj, 2), lineno);
154265
ZVAL_LONG(OBJ_PROP_NUM(obj, 3), text - LANG_SCNG(yy_start));
155266
} else if (token_type >= 256) {
156267
array_init(&token);
157268
add_next_index_long(&token, token_type);
158-
add_next_index_str(&token, make_str(text, leng, interned_strings));
269+
add_next_index_str(&token, make_str(text, leng));
159270
add_next_index_long(&token, lineno);
160271
} else {
161-
ZVAL_STR(&token, make_str(text, leng, interned_strings));
272+
ZVAL_STR(&token, make_str(text, leng));
162273
}
163274
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
164275
}
@@ -171,7 +282,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
171282
int token_type;
172283
int token_line = 1;
173284
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
174-
HashTable interned_strings;
175285

176286
ZVAL_STR_COPY(&source_zval, source);
177287
zend_save_lexical_state(&original_lex_state);
@@ -182,12 +292,10 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
182292
}
183293

184294
LANG_SCNG(yy_state) = yycINITIAL;
185-
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
186295
array_init(return_value);
187296

188297
while ((token_type = lex_scan(&token, NULL))) {
189-
add_token(return_value, token_type, zendtext, zendleng, token_line, as_object,
190-
&interned_strings);
298+
add_token(return_value, token_type, zendtext, zendleng, token_line, as_object);
191299

192300
if (Z_TYPE(token) != IS_UNDEF) {
193301
zval_ptr_dtor_nogc(&token);
@@ -203,8 +311,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
203311
/* fetch the rest into a T_INLINE_HTML */
204312
if (zendcursor != zendlimit) {
205313
add_token(return_value, T_INLINE_HTML,
206-
zendcursor, zendlimit - zendcursor, token_line, as_object,
207-
&interned_strings);
314+
zendcursor, zendlimit - zendcursor, token_line, as_object);
208315
}
209316
break;
210317
}
@@ -222,7 +329,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
222329

223330
zval_ptr_dtor_str(&source_zval);
224331
zend_restore_lexical_state(&original_lex_state);
225-
zend_hash_destroy(&interned_strings);
226332

227333
return 1;
228334
}
@@ -248,7 +354,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
248354
token = T_OPEN_TAG_WITH_ECHO;
249355
}
250356
add_token(ctx->tokens, token,
251-
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->as_object, NULL);
357+
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->as_object);
252358
break;
253359
case ON_FEEDBACK:
254360
tokens_ht = Z_ARRVAL_P(ctx->tokens);
@@ -263,8 +369,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
263369
case ON_STOP:
264370
if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
265371
add_token(ctx->tokens, T_INLINE_HTML, LANG_SCNG(yy_cursor),
266-
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno),
267-
ctx->as_object, NULL);
372+
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno), ctx->as_object);
268373
}
269374
break;
270375
}

ext/tokenizer/tokenizer.stub.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,12 @@
33
function token_get_all(string $source, int $flags = 0): array|false {}
44

55
function token_name(int $token): string {}
6+
7+
class PhpToken {
8+
/** @param int|string|array $kind */
9+
public function is($kind): bool;
10+
11+
public function isIgnorable(): bool;
12+
13+
public function getTokenName(): string;
14+
}

ext/tokenizer/tokenizer_arginfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,13 @@ ZEND_END_ARG_INFO()
88
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_token_name, 0, 1, IS_STRING, 0)
99
ZEND_ARG_TYPE_INFO(0, token, IS_LONG, 0)
1010
ZEND_END_ARG_INFO()
11+
12+
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_PhpToken_is, 0, 1, _IS_BOOL, 0)
13+
ZEND_ARG_INFO(0, kind)
14+
ZEND_END_ARG_INFO()
15+
16+
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_PhpToken_isIgnorable, 0, 0, _IS_BOOL, 0)
17+
ZEND_END_ARG_INFO()
18+
19+
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_PhpToken_getTokenName, 0, 0, IS_STRING, 0)
20+
ZEND_END_ARG_INFO()

0 commit comments

Comments
 (0)