Skip to content

Commit 0870da3

Browse files
authored
Use strcspn() to optimize dom_html5_escape_string() (#12948)
* Use strcspn() to optimize dom_html5_escape_string() This routine implemented by libc uses a faster algorithm than the old naive byte-per-byte approach here. It also is often optimized using SIMD. * Calculate mask outside of loop
1 parent 82baeeb commit 0870da3

File tree

1 file changed

+20
-16
lines changed

1 file changed

+20
-16
lines changed

ext/dom/html5_serializer.c

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,17 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con
7070
{
7171
const char *last_output = content;
7272

73-
while (*content != '\0') {
73+
/* Note: uses UTF-8 internally, so <C2 A0> indicates a non-breaking space */
74+
const char *mask = attribute_mode ? "&\xC2\"" : "&\xC2<>";
75+
76+
while (true) {
77+
size_t chunk_length = strcspn(content, mask);
78+
79+
content += chunk_length;
80+
if (*content == '\0') {
81+
break;
82+
}
83+
7484
switch (*content) {
7585
/* Step 1 */
7686
case '&': {
@@ -93,29 +103,23 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con
93103

94104
/* Step 3 */
95105
case '"': {
96-
if (attribute_mode) {
97-
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
98-
TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;")));
99-
last_output = content + 1;
100-
}
106+
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
107+
TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;")));
108+
last_output = content + 1;
101109
break;
102110
}
103111

104112
/* Step 4 */
105113
case '<': {
106-
if (!attribute_mode) {
107-
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
108-
TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;")));
109-
last_output = content + 1;
110-
}
114+
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
115+
TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;")));
116+
last_output = content + 1;
111117
break;
112118
}
113119
case '>': {
114-
if (!attribute_mode) {
115-
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
116-
TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;")));
117-
last_output = content + 1;
118-
}
120+
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
121+
TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;")));
122+
last_output = content + 1;
119123
break;
120124
}
121125
}

0 commit comments

Comments
 (0)