@@ -163,7 +163,7 @@ static inline unsigned int get_next_char(
163
163
else
164
164
MB_FAILURE (pos , 4 );
165
165
}
166
-
166
+
167
167
this_char = ((c & 0x07 ) << 18 ) | ((str [pos + 1 ] & 0x3f ) << 12 ) | ((str [pos + 2 ] & 0x3f ) << 6 ) | (str [pos + 3 ] & 0x3f );
168
168
if (this_char < 0x10000 || this_char > 0x10FFFF ) { /* non-shortest form or outside range */
169
169
MB_FAILURE (pos , 4 );
@@ -437,7 +437,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
437
437
438
438
if (charset_hint ) {
439
439
int found = 0 ;
440
-
440
+
441
441
/* now walk the charset map and look for the codeset */
442
442
for (i = 0 ; charset_map [i ].codeset ; i ++ ) {
443
443
if (len == strlen (charset_map [i ].codeset ) && strncasecmp (charset_hint , charset_map [i ].codeset , len ) == 0 ) {
@@ -545,7 +545,7 @@ static inline unsigned char unimap_bsearch(const uni_to_enc *table, unsigned cod
545
545
return 0 ;
546
546
547
547
code_key = (unsigned short ) code_key_a ;
548
-
548
+
549
549
while (l <= h ) {
550
550
m = l + (h - l ) / 2 ;
551
551
if (code_key < m -> un_code_point )
@@ -571,7 +571,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u
571
571
/* identity mapping of code points to unicode */
572
572
if (code > 0xFF ) {
573
573
return FAILURE ;
574
- }
574
+ }
575
575
* res = code ;
576
576
break ;
577
577
@@ -590,7 +590,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u
590
590
return FAILURE ;
591
591
}
592
592
break ;
593
-
593
+
594
594
case cs_8859_15 :
595
595
if (code < 0xA4 || (code > 0xBE && code <= 0xFF )) {
596
596
* res = code ;
@@ -634,7 +634,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u
634
634
case cs_cp866 :
635
635
table = unimap_cp866 ;
636
636
table_size = sizeof (unimap_cp866 ) / sizeof (* unimap_cp866 );
637
-
637
+
638
638
table_over_7F :
639
639
if (code <= 0x7F ) {
640
640
* res = code ;
@@ -710,7 +710,7 @@ static inline int unicode_cp_is_allowed(unsigned uni_cp, int document_type)
710
710
* Not sure this is the relevant part for HTML 5, though. I opted to
711
711
* disallow the characters that would result in a parse error when
712
712
* preprocessing of the input stream. See also section 8.1.3.
713
- *
713
+ *
714
714
* It's unclear if XHTML 1.0 allows C1 characters. I'll opt to apply to
715
715
* XHTML 1.0 the same rules as for XML 1.0.
716
716
* See <http://cmsmcq.com/2007/C1.xml>.
@@ -774,7 +774,7 @@ static inline int numeric_entity_is_allowed(unsigned uni_cp, int document_type)
774
774
/* {{{ process_numeric_entity
775
775
* Auxiliary function to traverse_for_entities.
776
776
* On input, *buf should point to the first character after # and on output, it's the last
777
- * byte read, no matter if there was success or insuccess.
777
+ * byte read, no matter if there was success or insuccess.
778
778
*/
779
779
static inline int process_numeric_entity (const char * * buf , unsigned * code_point )
780
780
{
@@ -784,7 +784,7 @@ static inline int process_numeric_entity(const char **buf, unsigned *code_point)
784
784
785
785
if (hexadecimal && (* * buf != '\0' ))
786
786
(* buf )++ ;
787
-
787
+
788
788
/* strtol allows whitespace and other stuff in the beginning
789
789
* we're not interested */
790
790
if ((hexadecimal && !isxdigit (* * buf )) ||
@@ -969,7 +969,7 @@ static void traverse_for_entities(
969
969
goto invalid_code ;
970
970
971
971
/* are we allowed to decode this entity in this document type?
972
- * HTML 5 is the only that has a character that cannot be used in
972
+ * HTML 5 is the only that has a character that cannot be used in
973
973
* a numeric entity but is allowed literally (U+000D). The
974
974
* unoptimized version would be ... || !numeric_entity_is_allowed(code) */
975
975
if (!unicode_cp_is_allowed (code , doctype ) ||
@@ -996,9 +996,9 @@ static void traverse_for_entities(
996
996
}
997
997
}
998
998
}
999
-
999
+
1000
1000
assert (* next == ';' );
1001
-
1001
+
1002
1002
if (((code == '\'' && !(flags & ENT_HTML_QUOTE_SINGLE )) ||
1003
1003
(code == '"' && !(flags & ENT_HTML_QUOTE_DOUBLE )))
1004
1004
/* && code2 == '\0' always true for current maps */ )
@@ -1026,7 +1026,7 @@ static void traverse_for_entities(
1026
1026
* (q ++ ) = * p ;
1027
1027
}
1028
1028
}
1029
-
1029
+
1030
1030
* q = '\0' ;
1031
1031
* retlen = (size_t )(q - ret );
1032
1032
}
@@ -1066,7 +1066,7 @@ static entity_table_opt determine_entity_table(int all, int doctype)
1066
1066
entity_table_opt retval = {NULL };
1067
1067
1068
1068
assert (!(doctype == ENT_HTML_DOC_XML1 && all ));
1069
-
1069
+
1070
1070
if (all ) {
1071
1071
retval .ms_table = (doctype == ENT_HTML_DOC_HTML5 ) ?
1072
1072
entity_ms_table_html5 : entity_ms_table_html4 ;
@@ -1111,13 +1111,13 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_
1111
1111
if (retlen == 0 ) {
1112
1112
goto empty_source ;
1113
1113
}
1114
-
1114
+
1115
1115
inverse_map = unescape_inverse_map (all , flags );
1116
-
1116
+
1117
1117
/* replace numeric entities */
1118
1118
traverse_for_entities (old , oldlen , ret , & retlen , all , flags , inverse_map , charset );
1119
1119
1120
- empty_source :
1120
+ empty_source :
1121
1121
* newlen = retlen ;
1122
1122
return ret ;
1123
1123
}
@@ -1141,7 +1141,7 @@ static inline void find_entity_for_char(
1141
1141
{
1142
1142
unsigned stage1_idx = ENT_STAGE1_INDEX (k );
1143
1143
const entity_stage3_row * c ;
1144
-
1144
+
1145
1145
if (stage1_idx > 0x1D ) {
1146
1146
* entity = NULL ;
1147
1147
* entity_len = 0 ;
@@ -1162,7 +1162,7 @@ static inline void find_entity_for_char(
1162
1162
if (!(* cursor < oldlen ))
1163
1163
goto no_suitable_2nd ;
1164
1164
1165
- next_char = get_next_char (charset , old , oldlen , cursor , & status );
1165
+ next_char = get_next_char (charset , old , oldlen , cursor , & status );
1166
1166
1167
1167
if (status == FAILURE )
1168
1168
goto no_suitable_2nd ;
@@ -1187,7 +1187,7 @@ static inline void find_entity_for_char(
1187
1187
* entity = (const unsigned char * )
1188
1188
c -> data .multicodepoint_table [0 ].leading_entry .default_entity ;
1189
1189
* entity_len = c -> data .multicodepoint_table [0 ].leading_entry .default_entity_len ;
1190
- }
1190
+ }
1191
1191
}
1192
1192
/* }}} */
1193
1193
@@ -1255,7 +1255,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
1255
1255
1256
1256
/* initial estimate */
1257
1257
if (oldlen < 64 ) {
1258
- maxlen = 128 ;
1258
+ maxlen = 128 ;
1259
1259
} else {
1260
1260
maxlen = 2 * oldlen ;
1261
1261
if (maxlen < oldlen ) {
@@ -1444,6 +1444,10 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
1444
1444
}
1445
1445
1446
1446
replaced = php_escape_html_entities_ex (str , str_len , & new_len , all , (int ) flags , hint_charset , double_encode TSRMLS_CC );
1447
+ if (new_len > INT_MAX ) {
1448
+ efree (replaced );
1449
+ RETURN_FALSE ;
1450
+ }
1447
1451
RETVAL_STRINGL (replaced , (int )new_len , 0 );
1448
1452
}
1449
1453
/* }}} */
@@ -1577,7 +1581,7 @@ static inline void write_s3row_data(
1577
1581
} else {
1578
1582
spe_cp = uni_cp ;
1579
1583
}
1580
-
1584
+
1581
1585
written_k2 = write_octet_sequence (& key [written_k1 ], charset , spe_cp );
1582
1586
memcpy (& entity [1 ], mcpr [i ].normal_entry .entity , l );
1583
1587
entity [l + 1 ] = ';' ;
@@ -1615,7 +1619,7 @@ PHP_FUNCTION(get_html_translation_table)
1615
1619
LIMIT_ALL (all , doctype , charset );
1616
1620
1617
1621
array_init (return_value );
1618
-
1622
+
1619
1623
entity_table = determine_entity_table (all , doctype );
1620
1624
if (all && !CHARSET_UNICODE_COMPAT (charset )) {
1621
1625
to_uni_table = enc_to_uni_index [charset ];
0 commit comments