@@ -125,31 +125,40 @@ PHP_MINFO_FUNCTION(tokenizer)
125
125
}
126
126
/* }}} */
127
127
128
- static inline zend_string * make_str (unsigned char * text , size_t leng ) {
128
+ static zend_string * make_str (unsigned char * text , size_t leng , HashTable * interned_strings ) {
129
129
if (leng == 1 ) {
130
130
return ZSTR_CHAR (text [0 ]);
131
+ } else if (interned_strings ) {
132
+ zend_string * interned_str = zend_hash_str_find_ptr (interned_strings , (char * ) text , leng );
133
+ if (interned_str ) {
134
+ return zend_string_copy (interned_str );
135
+ }
136
+ interned_str = zend_string_init ((char * ) text , leng , 0 );
137
+ zend_hash_add_new_ptr (interned_strings , interned_str , interned_str );
138
+ return interned_str ;
131
139
} else {
132
140
return zend_string_init ((char * ) text , leng , 0 );
133
141
}
134
142
}
135
143
136
- static void add_token (zval * return_value , int token_type ,
137
- unsigned char * text , size_t leng , int lineno , zend_bool as_object ) {
144
+ static void add_token (
145
+ zval * return_value , int token_type , unsigned char * text , size_t leng , int lineno ,
146
+ zend_bool as_object , HashTable * interned_strings ) {
138
147
zval token ;
139
148
if (as_object ) {
140
149
zend_object * obj = zend_objects_new (php_token_ce );
141
150
ZVAL_OBJ (& token , obj );
142
151
ZVAL_LONG (OBJ_PROP_NUM (obj , 0 ), token_type );
143
- ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng ));
152
+ ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng , interned_strings ));
144
153
ZVAL_LONG (OBJ_PROP_NUM (obj , 2 ), lineno );
145
154
ZVAL_LONG (OBJ_PROP_NUM (obj , 3 ), text - LANG_SCNG (yy_start ));
146
155
} else if (token_type >= 256 ) {
147
156
array_init (& token );
148
157
add_next_index_long (& token , token_type );
149
- add_next_index_str (& token , make_str (text , leng ));
158
+ add_next_index_str (& token , make_str (text , leng , interned_strings ));
150
159
add_next_index_long (& token , lineno );
151
160
} else {
152
- ZVAL_STR (& token , make_str (text , leng ));
161
+ ZVAL_STR (& token , make_str (text , leng , interned_strings ));
153
162
}
154
163
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & token );
155
164
}
@@ -162,6 +171,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
162
171
int token_type ;
163
172
int token_line = 1 ;
164
173
int need_tokens = -1 ; /* for __halt_compiler lexing. -1 = disabled */
174
+ HashTable interned_strings ;
165
175
166
176
ZVAL_STR_COPY (& source_zval , source );
167
177
zend_save_lexical_state (& original_lex_state );
@@ -172,10 +182,12 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
172
182
}
173
183
174
184
LANG_SCNG (yy_state ) = yycINITIAL ;
185
+ zend_hash_init (& interned_strings , 0 , NULL , NULL , 0 );
175
186
array_init (return_value );
176
187
177
188
while ((token_type = lex_scan (& token , NULL ))) {
178
- add_token (return_value , token_type , zendtext , zendleng , token_line , as_object );
189
+ add_token (return_value , token_type , zendtext , zendleng , token_line , as_object ,
190
+ & interned_strings );
179
191
180
192
if (Z_TYPE (token ) != IS_UNDEF ) {
181
193
zval_ptr_dtor_nogc (& token );
@@ -191,7 +203,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
191
203
/* fetch the rest into a T_INLINE_HTML */
192
204
if (zendcursor != zendlimit ) {
193
205
add_token (return_value , T_INLINE_HTML ,
194
- zendcursor , zendlimit - zendcursor , token_line , as_object );
206
+ zendcursor , zendlimit - zendcursor , token_line , as_object ,
207
+ & interned_strings );
195
208
}
196
209
break ;
197
210
}
@@ -209,6 +222,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
209
222
210
223
zval_ptr_dtor_str (& source_zval );
211
224
zend_restore_lexical_state (& original_lex_state );
225
+ zend_hash_destroy (& interned_strings );
212
226
213
227
return 1 ;
214
228
}
@@ -234,7 +248,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
234
248
token = T_OPEN_TAG_WITH_ECHO ;
235
249
}
236
250
add_token (ctx -> tokens , token ,
237
- LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object );
251
+ LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object , NULL );
238
252
break ;
239
253
case ON_FEEDBACK :
240
254
tokens_ht = Z_ARRVAL_P (ctx -> tokens );
@@ -249,7 +263,8 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
249
263
case ON_STOP :
250
264
if (LANG_SCNG (yy_cursor ) != LANG_SCNG (yy_limit )) {
251
265
add_token (ctx -> tokens , T_INLINE_HTML , LANG_SCNG (yy_cursor ),
252
- LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ), ctx -> as_object );
266
+ LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ),
267
+ ctx -> as_object , NULL );
253
268
}
254
269
break ;
255
270
}
0 commit comments