Skip to content

Commit 0da8b8b

Browse files
committed
Fix bug #72135 - don't create strings with lengths outside int range
1 parent 41fc3c7 commit 0da8b8b

File tree

1 file changed

+27
-23
lines changed

1 file changed

+27
-23
lines changed

ext/standard/html.c

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ static inline unsigned int get_next_char(
163163
else
164164
MB_FAILURE(pos, 4);
165165
}
166-
166+
167167
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
168168
if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */
169169
MB_FAILURE(pos, 4);
@@ -437,7 +437,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
437437

438438
if (charset_hint) {
439439
int found = 0;
440-
440+
441441
/* now walk the charset map and look for the codeset */
442442
for (i = 0; charset_map[i].codeset; i++) {
443443
if (len == strlen(charset_map[i].codeset) && strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) {
@@ -545,7 +545,7 @@ static inline unsigned char unimap_bsearch(const uni_to_enc *table, unsigned cod
545545
return 0;
546546

547547
code_key = (unsigned short) code_key_a;
548-
548+
549549
while (l <= h) {
550550
m = l + (h - l) / 2;
551551
if (code_key < m->un_code_point)
@@ -571,7 +571,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u
571571
/* identity mapping of code points to unicode */
572572
if (code > 0xFF) {
573573
return FAILURE;
574-
}
574+
}
575575
*res = code;
576576
break;
577577

@@ -590,7 +590,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u
590590
return FAILURE;
591591
}
592592
break;
593-
593+
594594
case cs_8859_15:
595595
if (code < 0xA4 || (code > 0xBE && code <= 0xFF)) {
596596
*res = code;
@@ -634,7 +634,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u
634634
case cs_cp866:
635635
table = unimap_cp866;
636636
table_size = sizeof(unimap_cp866) / sizeof(*unimap_cp866);
637-
637+
638638
table_over_7F:
639639
if (code <= 0x7F) {
640640
*res = code;
@@ -710,7 +710,7 @@ static inline int unicode_cp_is_allowed(unsigned uni_cp, int document_type)
710710
* Not sure this is the relevant part for HTML 5, though. I opted to
711711
* disallow the characters that would result in a parse error when
712712
* preprocessing of the input stream. See also section 8.1.3.
713-
*
713+
*
714714
* It's unclear if XHTML 1.0 allows C1 characters. I'll opt to apply to
715715
* XHTML 1.0 the same rules as for XML 1.0.
716716
* See <http://cmsmcq.com/2007/C1.xml>.
@@ -774,7 +774,7 @@ static inline int numeric_entity_is_allowed(unsigned uni_cp, int document_type)
774774
/* {{{ process_numeric_entity
775775
* Auxiliary function to traverse_for_entities.
776776
* On input, *buf should point to the first character after # and on output, it's the last
777-
* byte read, no matter if there was success or insuccess.
777+
* byte read, no matter if there was success or insuccess.
778778
*/
779779
static inline int process_numeric_entity(const char **buf, unsigned *code_point)
780780
{
@@ -784,7 +784,7 @@ static inline int process_numeric_entity(const char **buf, unsigned *code_point)
784784

785785
if (hexadecimal && (**buf != '\0'))
786786
(*buf)++;
787-
787+
788788
/* strtol allows whitespace and other stuff in the beginning
789789
* we're not interested */
790790
if ((hexadecimal && !isxdigit(**buf)) ||
@@ -969,7 +969,7 @@ static void traverse_for_entities(
969969
goto invalid_code;
970970

971971
/* are we allowed to decode this entity in this document type?
972-
* HTML 5 is the only that has a character that cannot be used in
972+
* HTML 5 is the only that has a character that cannot be used in
973973
* a numeric entity but is allowed literally (U+000D). The
974974
* unoptimized version would be ... || !numeric_entity_is_allowed(code) */
975975
if (!unicode_cp_is_allowed(code, doctype) ||
@@ -996,9 +996,9 @@ static void traverse_for_entities(
996996
}
997997
}
998998
}
999-
999+
10001000
assert(*next == ';');
1001-
1001+
10021002
if (((code == '\'' && !(flags & ENT_HTML_QUOTE_SINGLE)) ||
10031003
(code == '"' && !(flags & ENT_HTML_QUOTE_DOUBLE)))
10041004
/* && code2 == '\0' always true for current maps */)
@@ -1026,7 +1026,7 @@ static void traverse_for_entities(
10261026
*(q++) = *p;
10271027
}
10281028
}
1029-
1029+
10301030
*q = '\0';
10311031
*retlen = (size_t)(q - ret);
10321032
}
@@ -1066,7 +1066,7 @@ static entity_table_opt determine_entity_table(int all, int doctype)
10661066
entity_table_opt retval = {NULL};
10671067

10681068
assert(!(doctype == ENT_HTML_DOC_XML1 && all));
1069-
1069+
10701070
if (all) {
10711071
retval.ms_table = (doctype == ENT_HTML_DOC_HTML5) ?
10721072
entity_ms_table_html5 : entity_ms_table_html4;
@@ -1111,13 +1111,13 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_
11111111
if (retlen == 0) {
11121112
goto empty_source;
11131113
}
1114-
1114+
11151115
inverse_map = unescape_inverse_map(all, flags);
1116-
1116+
11171117
/* replace numeric entities */
11181118
traverse_for_entities(old, oldlen, ret, &retlen, all, flags, inverse_map, charset);
11191119

1120-
empty_source:
1120+
empty_source:
11211121
*newlen = retlen;
11221122
return ret;
11231123
}
@@ -1141,7 +1141,7 @@ static inline void find_entity_for_char(
11411141
{
11421142
unsigned stage1_idx = ENT_STAGE1_INDEX(k);
11431143
const entity_stage3_row *c;
1144-
1144+
11451145
if (stage1_idx > 0x1D) {
11461146
*entity = NULL;
11471147
*entity_len = 0;
@@ -1162,7 +1162,7 @@ static inline void find_entity_for_char(
11621162
if (!(*cursor < oldlen))
11631163
goto no_suitable_2nd;
11641164

1165-
next_char = get_next_char(charset, old, oldlen, cursor, &status);
1165+
next_char = get_next_char(charset, old, oldlen, cursor, &status);
11661166

11671167
if (status == FAILURE)
11681168
goto no_suitable_2nd;
@@ -1187,7 +1187,7 @@ static inline void find_entity_for_char(
11871187
*entity = (const unsigned char *)
11881188
c->data.multicodepoint_table[0].leading_entry.default_entity;
11891189
*entity_len = c->data.multicodepoint_table[0].leading_entry.default_entity_len;
1190-
}
1190+
}
11911191
}
11921192
/* }}} */
11931193

@@ -1255,7 +1255,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
12551255

12561256
/* initial estimate */
12571257
if (oldlen < 64) {
1258-
maxlen = 128;
1258+
maxlen = 128;
12591259
} else {
12601260
maxlen = 2 * oldlen;
12611261
if (maxlen < oldlen) {
@@ -1444,6 +1444,10 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
14441444
}
14451445

14461446
replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
1447+
if (new_len > INT_MAX) {
1448+
efree(replaced);
1449+
RETURN_FALSE;
1450+
}
14471451
RETVAL_STRINGL(replaced, (int)new_len, 0);
14481452
}
14491453
/* }}} */
@@ -1577,7 +1581,7 @@ static inline void write_s3row_data(
15771581
} else {
15781582
spe_cp = uni_cp;
15791583
}
1580-
1584+
15811585
written_k2 = write_octet_sequence(&key[written_k1], charset, spe_cp);
15821586
memcpy(&entity[1], mcpr[i].normal_entry.entity, l);
15831587
entity[l + 1] = ';';
@@ -1615,7 +1619,7 @@ PHP_FUNCTION(get_html_translation_table)
16151619
LIMIT_ALL(all, doctype, charset);
16161620

16171621
array_init(return_value);
1618-
1622+
16191623
entity_table = determine_entity_table(all, doctype);
16201624
if (all && !CHARSET_UNICODE_COMPAT(charset)) {
16211625
to_uni_table = enc_to_uni_index[charset];

0 commit comments

Comments
 (0)