@@ -76,6 +76,126 @@ zend_module_entry tokenizer_module_entry = {
76
76
ZEND_GET_MODULE (tokenizer )
77
77
#endif
78
78
79
+ static zval * php_token_get_id (zval * obj ) {
80
+ zval * id = OBJ_PROP_NUM (Z_OBJ_P (obj ), 0 );
81
+ if (Z_ISUNDEF_P (id )) {
82
+ zend_throw_error (NULL ,
83
+ "Typed property PhpToken::$id must not be accessed before initialization" );
84
+ return NULL ;
85
+ }
86
+
87
+ ZVAL_DEREF (id );
88
+ ZEND_ASSERT (Z_TYPE_P (id ) == IS_LONG );
89
+ return id ;
90
+ }
91
+
92
+ static zval * php_token_get_text (zval * obj ) {
93
+ zval * text = OBJ_PROP_NUM (Z_OBJ_P (obj ), 1 );
94
+ if (Z_ISUNDEF_P (text )) {
95
+ zend_throw_error (NULL ,
96
+ "Typed property PhpToken::$text must not be accessed before initialization" );
97
+ return NULL ;
98
+ }
99
+
100
+ ZVAL_DEREF (text );
101
+ ZEND_ASSERT (Z_TYPE_P (text ) == IS_STRING );
102
+ return text ;
103
+ }
104
+
105
+ PHP_METHOD (PhpToken , is )
106
+ {
107
+ zval * kind ;
108
+
109
+ ZEND_PARSE_PARAMETERS_START (1 , 1 )
110
+ Z_PARAM_ZVAL (kind )
111
+ ZEND_PARSE_PARAMETERS_END ();
112
+
113
+ if (Z_TYPE_P (kind ) == IS_LONG ) {
114
+ zval * id_zval = php_token_get_id (ZEND_THIS );
115
+ if (!id_zval ) {
116
+ RETURN_THROWS ();
117
+ }
118
+
119
+ RETURN_BOOL (Z_LVAL_P (id_zval ) == Z_LVAL_P (kind ));
120
+ } else if (Z_TYPE_P (kind ) == IS_STRING ) {
121
+ zval * text_zval = php_token_get_text (ZEND_THIS );
122
+ if (!text_zval ) {
123
+ RETURN_THROWS ();
124
+ }
125
+
126
+ RETURN_BOOL (zend_string_equals (Z_STR_P (text_zval ), Z_STR_P (kind )));
127
+ } else if (Z_TYPE_P (kind ) == IS_ARRAY ) {
128
+ zval * id_zval = NULL , * text_zval = NULL , * entry ;
129
+ ZEND_HASH_FOREACH_VAL (Z_ARRVAL_P (kind ), entry ) {
130
+ ZVAL_DEREF (entry );
131
+ if (Z_TYPE_P (entry ) == IS_LONG ) {
132
+ if (!id_zval ) {
133
+ id_zval = php_token_get_id (ZEND_THIS );
134
+ if (!id_zval ) {
135
+ RETURN_THROWS ();
136
+ }
137
+ }
138
+ if (Z_LVAL_P (id_zval ) == Z_LVAL_P (entry )) {
139
+ RETURN_TRUE ;
140
+ }
141
+ } else if (Z_TYPE_P (entry ) == IS_STRING ) {
142
+ if (!text_zval ) {
143
+ text_zval = php_token_get_text (ZEND_THIS );
144
+ if (!text_zval ) {
145
+ RETURN_THROWS ();
146
+ }
147
+ }
148
+ if (zend_string_equals (Z_STR_P (text_zval ), Z_STR_P (entry ))) {
149
+ RETURN_TRUE ;
150
+ }
151
+ } else {
152
+ zend_type_error ("Kind array must have elements of type int or string" );
153
+ RETURN_THROWS ();
154
+ }
155
+ } ZEND_HASH_FOREACH_END ();
156
+ RETURN_FALSE ;
157
+ } else {
158
+ zend_type_error ("Kind must be of type int, string or array" );
159
+ RETURN_THROWS ();
160
+ }
161
+ }
162
+
163
+ PHP_METHOD (PhpToken , isIgnorable )
164
+ {
165
+ ZEND_PARSE_PARAMETERS_NONE ();
166
+
167
+ zval * id_zval = php_token_get_id (ZEND_THIS );
168
+ if (!id_zval ) {
169
+ RETURN_THROWS ();
170
+ }
171
+
172
+ zend_long id = Z_LVAL_P (id_zval );
173
+ RETURN_BOOL (id == T_WHITESPACE || id == T_COMMENT || id == T_DOC_COMMENT || id == T_OPEN_TAG );
174
+ }
175
+
176
+ PHP_METHOD (PhpToken , getTokenName )
177
+ {
178
+ ZEND_PARSE_PARAMETERS_NONE ();
179
+
180
+ zval * id_zval = php_token_get_id (ZEND_THIS );
181
+ if (!id_zval ) {
182
+ RETURN_THROWS ();
183
+ }
184
+
185
+ if (Z_LVAL_P (id_zval ) < 256 ) {
186
+ RETURN_INTERNED_STR (ZSTR_CHAR (Z_LVAL_P (id_zval )));
187
+ } else {
188
+ RETURN_STRING (get_token_type_name (Z_LVAL_P (id_zval )));
189
+ }
190
+ }
191
+
192
+ static const zend_function_entry php_token_methods [] = {
193
+ PHP_ME (PhpToken , is , arginfo_class_PhpToken_is , ZEND_ACC_PUBLIC )
194
+ PHP_ME (PhpToken , isIgnorable , arginfo_class_PhpToken_isIgnorable , ZEND_ACC_PUBLIC )
195
+ PHP_ME (PhpToken , getTokenName , arginfo_class_PhpToken_getTokenName , ZEND_ACC_PUBLIC )
196
+ PHP_FE_END
197
+ };
198
+
79
199
/* {{{ PHP_MINIT_FUNCTION
80
200
*/
81
201
PHP_MINIT_FUNCTION (tokenizer )
@@ -88,7 +208,7 @@ PHP_MINIT_FUNCTION(tokenizer)
88
208
tokenizer_register_constants (INIT_FUNC_ARGS_PASSTHRU );
89
209
tokenizer_token_get_all_register_constants (INIT_FUNC_ARGS_PASSTHRU );
90
210
91
- INIT_CLASS_ENTRY (ce , "PhpToken" , NULL );
211
+ INIT_CLASS_ENTRY (ce , "PhpToken" , php_token_methods );
92
212
php_token_ce = zend_register_internal_class (& ce );
93
213
94
214
name = zend_string_init ("id" , sizeof ("id" ) - 1 , 1 );
@@ -125,40 +245,31 @@ PHP_MINFO_FUNCTION(tokenizer)
125
245
}
126
246
/* }}} */
127
247
128
- static zend_string * make_str (unsigned char * text , size_t leng , HashTable * interned_strings ) {
248
+ static inline zend_string * make_str (unsigned char * text , size_t leng ) {
129
249
if (leng == 1 ) {
130
250
return ZSTR_CHAR (text [0 ]);
131
- } else if (interned_strings ) {
132
- zend_string * interned_str = zend_hash_str_find_ptr (interned_strings , (char * ) text , leng );
133
- if (interned_str ) {
134
- return zend_string_copy (interned_str );
135
- }
136
- interned_str = zend_string_init ((char * ) text , leng , 0 );
137
- zend_hash_add_new_ptr (interned_strings , interned_str , interned_str );
138
- return interned_str ;
139
251
} else {
140
252
return zend_string_init ((char * ) text , leng , 0 );
141
253
}
142
254
}
143
255
144
- static void add_token (
145
- zval * return_value , int token_type , unsigned char * text , size_t leng , int lineno ,
146
- zend_bool as_object , HashTable * interned_strings ) {
256
+ static void add_token (zval * return_value , int token_type ,
257
+ unsigned char * text , size_t leng , int lineno , zend_bool as_object ) {
147
258
zval token ;
148
259
if (as_object ) {
149
260
zend_object * obj = zend_objects_new (php_token_ce );
150
261
ZVAL_OBJ (& token , obj );
151
262
ZVAL_LONG (OBJ_PROP_NUM (obj , 0 ), token_type );
152
- ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng , interned_strings ));
263
+ ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng ));
153
264
ZVAL_LONG (OBJ_PROP_NUM (obj , 2 ), lineno );
154
265
ZVAL_LONG (OBJ_PROP_NUM (obj , 3 ), text - LANG_SCNG (yy_start ));
155
266
} else if (token_type >= 256 ) {
156
267
array_init (& token );
157
268
add_next_index_long (& token , token_type );
158
- add_next_index_str (& token , make_str (text , leng , interned_strings ));
269
+ add_next_index_str (& token , make_str (text , leng ));
159
270
add_next_index_long (& token , lineno );
160
271
} else {
161
- ZVAL_STR (& token , make_str (text , leng , interned_strings ));
272
+ ZVAL_STR (& token , make_str (text , leng ));
162
273
}
163
274
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & token );
164
275
}
@@ -171,7 +282,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
171
282
int token_type ;
172
283
int token_line = 1 ;
173
284
int need_tokens = -1 ; /* for __halt_compiler lexing. -1 = disabled */
174
- HashTable interned_strings ;
175
285
176
286
ZVAL_STR_COPY (& source_zval , source );
177
287
zend_save_lexical_state (& original_lex_state );
@@ -182,12 +292,10 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
182
292
}
183
293
184
294
LANG_SCNG (yy_state ) = yycINITIAL ;
185
- zend_hash_init (& interned_strings , 0 , NULL , NULL , 0 );
186
295
array_init (return_value );
187
296
188
297
while ((token_type = lex_scan (& token , NULL ))) {
189
- add_token (return_value , token_type , zendtext , zendleng , token_line , as_object ,
190
- & interned_strings );
298
+ add_token (return_value , token_type , zendtext , zendleng , token_line , as_object );
191
299
192
300
if (Z_TYPE (token ) != IS_UNDEF ) {
193
301
zval_ptr_dtor_nogc (& token );
@@ -203,8 +311,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
203
311
/* fetch the rest into a T_INLINE_HTML */
204
312
if (zendcursor != zendlimit ) {
205
313
add_token (return_value , T_INLINE_HTML ,
206
- zendcursor , zendlimit - zendcursor , token_line , as_object ,
207
- & interned_strings );
314
+ zendcursor , zendlimit - zendcursor , token_line , as_object );
208
315
}
209
316
break ;
210
317
}
@@ -222,7 +329,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
222
329
223
330
zval_ptr_dtor_str (& source_zval );
224
331
zend_restore_lexical_state (& original_lex_state );
225
- zend_hash_destroy (& interned_strings );
226
332
227
333
return 1 ;
228
334
}
@@ -248,7 +354,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
248
354
token = T_OPEN_TAG_WITH_ECHO ;
249
355
}
250
356
add_token (ctx -> tokens , token ,
251
- LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object , NULL );
357
+ LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object );
252
358
break ;
253
359
case ON_FEEDBACK :
254
360
tokens_ht = Z_ARRVAL_P (ctx -> tokens );
@@ -263,8 +369,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
263
369
case ON_STOP :
264
370
if (LANG_SCNG (yy_cursor ) != LANG_SCNG (yy_limit )) {
265
371
add_token (ctx -> tokens , T_INLINE_HTML , LANG_SCNG (yy_cursor ),
266
- LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ),
267
- ctx -> as_object , NULL );
372
+ LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ), ctx -> as_object );
268
373
}
269
374
break ;
270
375
}
0 commit comments