Skip to content

Commit fac1a2d

Browse files
committed
Locally intern strings in token_get_all()
1 parent 1a57c20 commit fac1a2d

File tree

1 file changed

+25
-10
lines changed

1 file changed

+25
-10
lines changed

ext/tokenizer/tokenizer.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -125,31 +125,40 @@ PHP_MINFO_FUNCTION(tokenizer)
125125
}
126126
/* }}} */
127127

128-
static inline zend_string *make_str(unsigned char *text, size_t leng) {
128+
static zend_string *make_str(unsigned char *text, size_t leng, HashTable *interned_strings) {
129129
if (leng == 1) {
130130
return ZSTR_CHAR(text[0]);
131+
} else if (interned_strings) {
132+
zend_string *interned_str = zend_hash_str_find_ptr(interned_strings, (char *) text, leng);
133+
if (interned_str) {
134+
return zend_string_copy(interned_str);
135+
}
136+
interned_str = zend_string_init((char *) text, leng, 0);
137+
zend_hash_add_new_ptr(interned_strings, interned_str, interned_str);
138+
return interned_str;
131139
} else {
132140
return zend_string_init((char *) text, leng, 0);
133141
}
134142
}
135143

136-
static void add_token(zval *return_value, int token_type,
137-
unsigned char *text, size_t leng, int lineno, zend_bool as_object) {
144+
static void add_token(
145+
zval *return_value, int token_type, unsigned char *text, size_t leng, int lineno,
146+
zend_bool as_object, HashTable *interned_strings) {
138147
zval token;
139148
if (as_object) {
140149
zend_object *obj = zend_objects_new(php_token_ce);
141150
ZVAL_OBJ(&token, obj);
142151
ZVAL_LONG(OBJ_PROP_NUM(obj, 0), token_type);
143-
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng));
152+
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng, interned_strings));
144153
ZVAL_LONG(OBJ_PROP_NUM(obj, 2), lineno);
145154
ZVAL_LONG(OBJ_PROP_NUM(obj, 3), text - LANG_SCNG(yy_start));
146155
} else if (token_type >= 256) {
147156
array_init(&token);
148157
add_next_index_long(&token, token_type);
149-
add_next_index_str(&token, make_str(text, leng));
158+
add_next_index_str(&token, make_str(text, leng, interned_strings));
150159
add_next_index_long(&token, lineno);
151160
} else {
152-
ZVAL_STR(&token, make_str(text, leng));
161+
ZVAL_STR(&token, make_str(text, leng, interned_strings));
153162
}
154163
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
155164
}
@@ -162,6 +171,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
162171
int token_type;
163172
int token_line = 1;
164173
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
174+
HashTable interned_strings;
165175

166176
ZVAL_STR_COPY(&source_zval, source);
167177
zend_save_lexical_state(&original_lex_state);
@@ -172,10 +182,12 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
172182
}
173183

174184
LANG_SCNG(yy_state) = yycINITIAL;
185+
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
175186
array_init(return_value);
176187

177188
while ((token_type = lex_scan(&token, NULL))) {
178-
add_token(return_value, token_type, zendtext, zendleng, token_line, as_object);
189+
add_token(return_value, token_type, zendtext, zendleng, token_line, as_object,
190+
&interned_strings);
179191

180192
if (Z_TYPE(token) != IS_UNDEF) {
181193
zval_ptr_dtor_nogc(&token);
@@ -191,7 +203,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
191203
/* fetch the rest into a T_INLINE_HTML */
192204
if (zendcursor != zendlimit) {
193205
add_token(return_value, T_INLINE_HTML,
194-
zendcursor, zendlimit - zendcursor, token_line, as_object);
206+
zendcursor, zendlimit - zendcursor, token_line, as_object,
207+
&interned_strings);
195208
}
196209
break;
197210
}
@@ -209,6 +222,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
209222

210223
zval_ptr_dtor_str(&source_zval);
211224
zend_restore_lexical_state(&original_lex_state);
225+
zend_hash_destroy(&interned_strings);
212226

213227
return 1;
214228
}
@@ -234,7 +248,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
234248
token = T_OPEN_TAG_WITH_ECHO;
235249
}
236250
add_token(ctx->tokens, token,
237-
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->as_object);
251+
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->as_object, NULL);
238252
break;
239253
case ON_FEEDBACK:
240254
tokens_ht = Z_ARRVAL_P(ctx->tokens);
@@ -249,7 +263,8 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
249263
case ON_STOP:
250264
if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
251265
add_token(ctx->tokens, T_INLINE_HTML, LANG_SCNG(yy_cursor),
252-
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno), ctx->as_object);
266+
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno),
267+
ctx->as_object, NULL);
253268
}
254269
break;
255270
}

0 commit comments

Comments
 (0)