Skip to content

Commit 8533fcc

Browse files
committed
Assert minimum size of wchar buffer in text conversion filters
In all text conversion filters which require the wchar buffer used for output to have some minimum size, it's better to include an assertion; this will help us to catch bugs, and will also help future readers to understand what we expect of the function arguments. For UTF-7 and UTF7-IMAP, these assertions were already there, but I have added comments explaining why the minimum size is what it is.
1 parent 871e61f commit 8533fcc

File tree

6 files changed

+20
-0
lines changed

6 files changed

+20
-0
lines changed

ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,8 @@ static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
640640

641641
static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
642642
{
643+
ZEND_ASSERT(bufsize >= 3);
644+
643645
unsigned char *p = *in, *e = p + *in_len;
644646
uint32_t *out = buf, *limit = buf + bufsize;
645647

ext/mbstring/libmbfl/filters/mbfilter_jis.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,8 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter)
463463

464464
static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
465465
{
466+
ZEND_ASSERT(bufsize >= 3);
467+
466468
unsigned char *p = *in, *e = p + *in_len;
467469
uint32_t *out = buf, *limit = buf + bufsize;
468470

ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,9 @@ mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter)
667667

668668
static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
669669
{
670+
/* A single SJIS-Mac kuten code can convert to up to 5 Unicode codepoints, oh my! */
671+
ZEND_ASSERT(bufsize >= 5);
672+
670673
unsigned char *p = *in, *e = p + *in_len;
671674
uint32_t *out = buf, *limit = buf + bufsize;
672675

ext/mbstring/libmbfl/filters/mbfilter_utf7.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,13 @@ static size_t mb_utf7_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
480480
{
481481
ZEND_ASSERT(bufsize >= 5); /* This function will infinite-loop if called with a tiny output buffer */
482482

483+
/* Why does this require a minimum output buffer size of 5?
484+
* There is one case where one iteration of the main 'while' loop below will emit 5 wchars:
485+
* that is if the first half of a surrogate pair is followed by an otherwise valid codepoint which
486+
* is not the 2nd half of a surrogate pair, then another valid codepoint, then the Base64-encoded
487+
* section ends with a byte which is not a valid Base64 character, AND which also is not in a
488+
* position where we would expect the Base64-encoded section to end */
489+
483490
unsigned char *p = *in, *e = p + *in_len;
484491
uint32_t *out = buf, *limit = buf + bufsize;
485492

ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,10 @@ static size_t mb_utf7imap_to_wchar(unsigned char **in, size_t *in_len, uint32_t
505505
{
506506
ZEND_ASSERT(bufsize >= 5); /* This function will infinite-loop if called with a tiny output buffer */
507507

508+
/* Why does this require a minimum output buffer size of 5?
509+
* See comment in mb_utf7_to_wchar; the worst case for this function is similar,
510+
* though not exactly the same. */
511+
508512
unsigned char *p = *in, *e = p + *in_len;
509513
/* Always leave one empty space in output buffer in case the string ends while
510514
* in Base64 mode and we need to emit an error marker */

ext/mbstring/libmbfl/filters/mbfilter_uuencode.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
167167

168168
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
169169
{
170+
ZEND_ASSERT(bufsize >= 3);
171+
170172
unsigned char *p = *in, *e = p + *in_len;
171173
uint32_t *out = buf, *limit = buf + bufsize;
172174

0 commit comments

Comments
 (0)