Skip to content

Commit 01b8117

Browse files
committed
Add PhpToken methods
1 parent dc27c8d commit 01b8117

File tree

4 files changed

+260
-26
lines changed

4 files changed

+260
-26
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
--TEST--
2+
PhpToken instance methods
3+
--FILE--
4+
<?php
5+
6+
$code = <<<'PHP'
7+
<?php
8+
// comment
9+
/** comment */
10+
function foo() {
11+
echo "bar";
12+
}
13+
PHP;
14+
15+
// Token names and ignorability.
16+
$tokens = token_get_all($code, TOKEN_AS_OBJECT);
17+
foreach ($tokens as $i => $token) {
18+
printf("[%2d] %-26s %s\n", $i, $token->getTokenName(),
19+
$token->isIgnorable() ? "ignorable" : "meaningful");
20+
}
21+
22+
// is() variations
23+
24+
echo "\nSuccess:\n";
25+
var_dump($tokens[4]->is(T_FUNCTION));
26+
var_dump($tokens[4]->is('function'));
27+
var_dump($tokens[4]->is(['class', T_FUNCTION]));
28+
var_dump($tokens[4]->is([T_CLASS, 'function']));
29+
30+
echo "\nFailure:\n";
31+
var_dump($tokens[4]->is(T_CLASS));
32+
var_dump($tokens[4]->is('class'));
33+
var_dump($tokens[4]->is(['class', T_TRAIT]));
34+
var_dump($tokens[4]->is([T_CLASS, 'trait']));
35+
36+
echo "\nError:\n";
37+
try {
38+
$tokens[4]->is(3.141);
39+
} catch (TypeError $e) {
40+
echo $e->getMessage(), "\n";
41+
}
42+
try {
43+
$tokens[4]->is([3.141]);
44+
} catch (TypeError $e) {
45+
echo $e->getMessage(), "\n";
46+
}
47+
48+
unset($tokens[4]->id);
49+
unset($tokens[4]->text);
50+
try {
51+
$tokens[4]->is(T_FUNCTION);
52+
} catch (Error $e) {
53+
echo $e->getMessage(), "\n";
54+
}
55+
try {
56+
$tokens[4]->is('function');
57+
} catch (Error $e) {
58+
echo $e->getMessage(), "\n";
59+
}
60+
try {
61+
$tokens[4]->is([T_FUNCTION]);
62+
} catch (Error $e) {
63+
echo $e->getMessage(), "\n";
64+
}
65+
try {
66+
$tokens[4]->is(['function']);
67+
} catch (Error $e) {
68+
echo $e->getMessage(), "\n";
69+
}
70+
71+
?>
72+
--EXPECT--
73+
[ 0] T_OPEN_TAG ignorable
74+
[ 1] T_COMMENT ignorable
75+
[ 2] T_DOC_COMMENT ignorable
76+
[ 3] T_WHITESPACE ignorable
77+
[ 4] T_FUNCTION meaningful
78+
[ 5] T_WHITESPACE ignorable
79+
[ 6] T_STRING meaningful
80+
[ 7] ( meaningful
81+
[ 8] ) meaningful
82+
[ 9] T_WHITESPACE ignorable
83+
[10] { meaningful
84+
[11] T_WHITESPACE ignorable
85+
[12] T_ECHO meaningful
86+
[13] T_WHITESPACE ignorable
87+
[14] T_CONSTANT_ENCAPSED_STRING meaningful
88+
[15] ; meaningful
89+
[16] T_WHITESPACE ignorable
90+
[17] } meaningful
91+
92+
Success:
93+
bool(true)
94+
bool(true)
95+
bool(true)
96+
bool(true)
97+
98+
Failure:
99+
bool(false)
100+
bool(false)
101+
bool(false)
102+
bool(false)
103+
104+
Error:
105+
Kind must be of type int, string or array
106+
Kind array must have elements of type int or string
107+
Typed property PhpToken::$id must not be accessed before initialization
108+
Typed property PhpToken::$text must not be accessed before initialization
109+
Typed property PhpToken::$id must not be accessed before initialization
110+
Typed property PhpToken::$text must not be accessed before initialization

ext/tokenizer/tokenizer.c

Lines changed: 131 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,126 @@ zend_module_entry tokenizer_module_entry = {
7676
ZEND_GET_MODULE(tokenizer)
7777
#endif
7878

79+
static zval *php_token_get_id(zval *obj) {
80+
zval *id = OBJ_PROP_NUM(Z_OBJ_P(obj), 0);
81+
if (Z_ISUNDEF_P(id)) {
82+
zend_throw_error(NULL,
83+
"Typed property PhpToken::$id must not be accessed before initialization");
84+
return NULL;
85+
}
86+
87+
ZVAL_DEREF(id);
88+
ZEND_ASSERT(Z_TYPE_P(id) == IS_LONG);
89+
return id;
90+
}
91+
92+
static zval *php_token_get_text(zval *obj) {
93+
zval *text = OBJ_PROP_NUM(Z_OBJ_P(obj), 1);
94+
if (Z_ISUNDEF_P(text)) {
95+
zend_throw_error(NULL,
96+
"Typed property PhpToken::$text must not be accessed before initialization");
97+
return NULL;
98+
}
99+
100+
ZVAL_DEREF(text);
101+
ZEND_ASSERT(Z_TYPE_P(text) == IS_STRING);
102+
return text;
103+
}
104+
105+
PHP_METHOD(PhpToken, is)
106+
{
107+
zval *kind;
108+
109+
ZEND_PARSE_PARAMETERS_START(1, 1)
110+
Z_PARAM_ZVAL(kind)
111+
ZEND_PARSE_PARAMETERS_END();
112+
113+
if (Z_TYPE_P(kind) == IS_LONG) {
114+
zval *id_zval = php_token_get_id(ZEND_THIS);
115+
if (!id_zval) {
116+
RETURN_THROWS();
117+
}
118+
119+
RETURN_BOOL(Z_LVAL_P(id_zval) == Z_LVAL_P(kind));
120+
} else if (Z_TYPE_P(kind) == IS_STRING) {
121+
zval *text_zval = php_token_get_text(ZEND_THIS);
122+
if (!text_zval) {
123+
RETURN_THROWS();
124+
}
125+
126+
RETURN_BOOL(zend_string_equals(Z_STR_P(text_zval), Z_STR_P(kind)));
127+
} else if (Z_TYPE_P(kind) == IS_ARRAY) {
128+
zval *id_zval = NULL, *text_zval = NULL, *entry;
129+
ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(kind), entry) {
130+
ZVAL_DEREF(entry);
131+
if (Z_TYPE_P(entry) == IS_LONG) {
132+
if (!id_zval) {
133+
id_zval = php_token_get_id(ZEND_THIS);
134+
if (!id_zval) {
135+
RETURN_THROWS();
136+
}
137+
}
138+
if (Z_LVAL_P(id_zval) == Z_LVAL_P(entry)) {
139+
RETURN_TRUE;
140+
}
141+
} else if (Z_TYPE_P(entry) == IS_STRING) {
142+
if (!text_zval) {
143+
text_zval = php_token_get_text(ZEND_THIS);
144+
if (!text_zval) {
145+
RETURN_THROWS();
146+
}
147+
}
148+
if (zend_string_equals(Z_STR_P(text_zval), Z_STR_P(entry))) {
149+
RETURN_TRUE;
150+
}
151+
} else {
152+
zend_type_error("Kind array must have elements of type int or string");
153+
RETURN_THROWS();
154+
}
155+
} ZEND_HASH_FOREACH_END();
156+
RETURN_FALSE;
157+
} else {
158+
zend_type_error("Kind must be of type int, string or array");
159+
RETURN_THROWS();
160+
}
161+
}
162+
163+
PHP_METHOD(PhpToken, isIgnorable)
164+
{
165+
ZEND_PARSE_PARAMETERS_NONE();
166+
167+
zval *id_zval = php_token_get_id(ZEND_THIS);
168+
if (!id_zval) {
169+
RETURN_THROWS();
170+
}
171+
172+
zend_long id = Z_LVAL_P(id_zval);
173+
RETURN_BOOL(id == T_WHITESPACE || id == T_COMMENT || id == T_DOC_COMMENT || id == T_OPEN_TAG);
174+
}
175+
176+
PHP_METHOD(PhpToken, getTokenName)
177+
{
178+
ZEND_PARSE_PARAMETERS_NONE();
179+
180+
zval *id_zval = php_token_get_id(ZEND_THIS);
181+
if (!id_zval) {
182+
RETURN_THROWS();
183+
}
184+
185+
if (Z_LVAL_P(id_zval) < 256) {
186+
RETURN_INTERNED_STR(ZSTR_CHAR(Z_LVAL_P(id_zval)));
187+
} else {
188+
RETURN_STRING(get_token_type_name(Z_LVAL_P(id_zval)));
189+
}
190+
}
191+
192+
static const zend_function_entry php_token_methods[] = {
193+
PHP_ME(PhpToken, is, arginfo_class_PhpToken_is, ZEND_ACC_PUBLIC)
194+
PHP_ME(PhpToken, isIgnorable, arginfo_class_PhpToken_isIgnorable, ZEND_ACC_PUBLIC)
195+
PHP_ME(PhpToken, getTokenName, arginfo_class_PhpToken_getTokenName, ZEND_ACC_PUBLIC)
196+
PHP_FE_END
197+
};
198+
79199
/* {{{ PHP_MINIT_FUNCTION
80200
*/
81201
PHP_MINIT_FUNCTION(tokenizer)
@@ -88,7 +208,7 @@ PHP_MINIT_FUNCTION(tokenizer)
88208
tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
89209
tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
90210

91-
INIT_CLASS_ENTRY(ce, "PhpToken", NULL);
211+
INIT_CLASS_ENTRY(ce, "PhpToken", php_token_methods);
92212
php_token_ce = zend_register_internal_class(&ce);
93213

94214
name = zend_string_init("id", sizeof("id") - 1, 1);
@@ -125,40 +245,31 @@ PHP_MINFO_FUNCTION(tokenizer)
125245
}
126246
/* }}} */
127247

128-
static zend_string *make_str(unsigned char *text, size_t leng, HashTable *interned_strings) {
248+
static inline zend_string *make_str(unsigned char *text, size_t leng) {
129249
if (leng == 1) {
130250
return ZSTR_CHAR(text[0]);
131-
} else if (interned_strings) {
132-
zend_string *interned_str = zend_hash_str_find_ptr(interned_strings, (char *) text, leng);
133-
if (interned_str) {
134-
return zend_string_copy(interned_str);
135-
}
136-
interned_str = zend_string_init((char *) text, leng, 0);
137-
zend_hash_add_new_ptr(interned_strings, interned_str, interned_str);
138-
return interned_str;
139251
} else {
140252
return zend_string_init((char *) text, leng, 0);
141253
}
142254
}
143255

144-
static void add_token(
145-
zval *return_value, int token_type, unsigned char *text, size_t leng, int lineno,
146-
zend_bool as_object, HashTable *interned_strings) {
256+
static void add_token(zval *return_value, int token_type,
257+
unsigned char *text, size_t leng, int lineno, zend_bool as_object) {
147258
zval token;
148259
if (as_object) {
149260
zend_object *obj = zend_objects_new(php_token_ce);
150261
ZVAL_OBJ(&token, obj);
151262
ZVAL_LONG(OBJ_PROP_NUM(obj, 0), token_type);
152-
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng, interned_strings));
263+
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng));
153264
ZVAL_LONG(OBJ_PROP_NUM(obj, 2), lineno);
154265
ZVAL_LONG(OBJ_PROP_NUM(obj, 3), text - LANG_SCNG(yy_start));
155266
} else if (token_type >= 256) {
156267
array_init(&token);
157268
add_next_index_long(&token, token_type);
158-
add_next_index_str(&token, make_str(text, leng, interned_strings));
269+
add_next_index_str(&token, make_str(text, leng));
159270
add_next_index_long(&token, lineno);
160271
} else {
161-
ZVAL_STR(&token, make_str(text, leng, interned_strings));
272+
ZVAL_STR(&token, make_str(text, leng));
162273
}
163274
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
164275
}
@@ -171,7 +282,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
171282
int token_type;
172283
int token_line = 1;
173284
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
174-
HashTable interned_strings;
175285

176286
ZVAL_STR_COPY(&source_zval, source);
177287
zend_save_lexical_state(&original_lex_state);
@@ -182,12 +292,10 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
182292
}
183293

184294
LANG_SCNG(yy_state) = yycINITIAL;
185-
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
186295
array_init(return_value);
187296

188297
while ((token_type = lex_scan(&token, NULL))) {
189-
add_token(return_value, token_type, zendtext, zendleng, token_line, as_object,
190-
&interned_strings);
298+
add_token(return_value, token_type, zendtext, zendleng, token_line, as_object);
191299

192300
if (Z_TYPE(token) != IS_UNDEF) {
193301
zval_ptr_dtor_nogc(&token);
@@ -203,8 +311,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
203311
/* fetch the rest into a T_INLINE_HTML */
204312
if (zendcursor != zendlimit) {
205313
add_token(return_value, T_INLINE_HTML,
206-
zendcursor, zendlimit - zendcursor, token_line, as_object,
207-
&interned_strings);
314+
zendcursor, zendlimit - zendcursor, token_line, as_object);
208315
}
209316
break;
210317
}
@@ -222,7 +329,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
222329

223330
zval_ptr_dtor_str(&source_zval);
224331
zend_restore_lexical_state(&original_lex_state);
225-
zend_hash_destroy(&interned_strings);
226332

227333
return 1;
228334
}
@@ -248,7 +354,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
248354
token = T_OPEN_TAG_WITH_ECHO;
249355
}
250356
add_token(ctx->tokens, token,
251-
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->as_object, NULL);
357+
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->as_object);
252358
break;
253359
case ON_FEEDBACK:
254360
tokens_ht = Z_ARRVAL_P(ctx->tokens);
@@ -263,8 +369,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
263369
case ON_STOP:
264370
if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
265371
add_token(ctx->tokens, T_INLINE_HTML, LANG_SCNG(yy_cursor),
266-
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno),
267-
ctx->as_object, NULL);
372+
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno), ctx->as_object);
268373
}
269374
break;
270375
}

ext/tokenizer/tokenizer.stub.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,12 @@
33
function token_get_all(string $source, int $flags = 0): array {}
44

55
function token_name(int $token): string {}
6+
7+
class PhpToken {
8+
/** @param int|string|array $kind */
9+
public function is($kind): bool;
10+
11+
public function isIgnorable(): bool;
12+
13+
public function getTokenName(): string;
14+
}

ext/tokenizer/tokenizer_arginfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,13 @@ ZEND_END_ARG_INFO()
88
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_token_name, 0, 1, IS_STRING, 0)
99
ZEND_ARG_TYPE_INFO(0, token, IS_LONG, 0)
1010
ZEND_END_ARG_INFO()
11+
12+
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_PhpToken_is, 0, 1, _IS_BOOL, 0)
13+
ZEND_ARG_INFO(0, kind)
14+
ZEND_END_ARG_INFO()
15+
16+
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_PhpToken_isIgnorable, 0, 0, _IS_BOOL, 0)
17+
ZEND_END_ARG_INFO()
18+
19+
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_PhpToken_getTokenName, 0, 0, IS_STRING, 0)
20+
ZEND_END_ARG_INFO()

0 commit comments

Comments
 (0)