Skip to content

Commit bb21617

Browse files
committed
Implement mb_str_pad()
1 parent a94fe87 commit bb21617

File tree

6 files changed

+268
-1
lines changed

6 files changed

+268
-1
lines changed

NEWS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ PHP NEWS
103103
QPrint-encoding the input string. This previously caused strings in
104104
certain text encodings, especially UTF-16 and UTF-32, to be
105105
corrupted by mb_encode_mimeheader. (Alex Dowad)
106+
. Added mb_str_pad(). (nielsdos)
106107

107108
- mysqli:
108109
. mysqli_fetch_object raises a ValueError instead of an Exception.

UPGRADING

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,10 @@ PHP 8.3 UPGRADE NOTES
172172
the given $depth and $options.
173173
RFC: https://wiki.php.net/rfc/json_validate
174174

175+
- Mbstring:
176+
. Added mb_str_pad(), which is the mbstring equivalent of str_pad().
177+
RFC: https://wiki.php.net/rfc/mb_str_pad
178+
175179
- Posix:
176180
. Added posix_sysconf call to get runtime informations.
177181
. Added posix_pathconf call to get configuration value from a directory/file.

ext/mbstring/mbstring.c

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5521,6 +5521,133 @@ PHP_FUNCTION(mb_chr)
55215521
}
55225522
/* }}} */
55235523

5524+
PHP_FUNCTION(mb_str_pad)
5525+
{
5526+
zend_string *input, *encoding_str = NULL, *pad = NULL;
5527+
zend_long pad_to_length;
5528+
zend_long pad_type_val = PHP_STR_PAD_RIGHT;
5529+
5530+
ZEND_PARSE_PARAMETERS_START(2, 5)
5531+
Z_PARAM_STR(input)
5532+
Z_PARAM_LONG(pad_to_length)
5533+
Z_PARAM_OPTIONAL
5534+
Z_PARAM_STR(pad)
5535+
Z_PARAM_LONG(pad_type_val)
5536+
Z_PARAM_STR_OR_NULL(encoding_str)
5537+
ZEND_PARSE_PARAMETERS_END();
5538+
5539+
const mbfl_encoding *encoding = php_mb_get_encoding(encoding_str, 5);
5540+
if (!encoding) {
5541+
RETURN_THROWS();
5542+
}
5543+
5544+
size_t input_length = mb_get_strlen(input, encoding);
5545+
5546+
/* If resulting string turns out to be shorter than input string,
5547+
we simply copy the input and return. */
5548+
if (pad_to_length < 0 || (size_t)pad_to_length <= input_length) {
5549+
RETURN_STR_COPY(input);
5550+
}
5551+
5552+
if (ZSTR_LEN(pad) == 0) {
5553+
zend_argument_value_error(3, "must be a non-empty string");
5554+
RETURN_THROWS();
5555+
}
5556+
5557+
if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
5558+
zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
5559+
RETURN_THROWS();
5560+
}
5561+
5562+
size_t pad_length = mb_get_strlen(pad, encoding);
5563+
5564+
size_t num_mb_pad_chars = pad_to_length - input_length;
5565+
5566+
/* We need to figure out the left/right padding lengths. */
5567+
size_t left_pad, right_pad;
5568+
switch (pad_type_val) {
5569+
case PHP_STR_PAD_RIGHT:
5570+
left_pad = 0;
5571+
right_pad = num_mb_pad_chars;
5572+
break;
5573+
5574+
case PHP_STR_PAD_LEFT:
5575+
left_pad = num_mb_pad_chars;
5576+
right_pad = 0;
5577+
break;
5578+
5579+
case PHP_STR_PAD_BOTH:
5580+
left_pad = num_mb_pad_chars / 2;
5581+
right_pad = num_mb_pad_chars - left_pad;
5582+
break;
5583+
}
5584+
5585+
/* Compute the number of bytes required for the padding */
5586+
size_t full_left_pad_chars = left_pad / pad_length;
5587+
size_t full_right_pad_chars = right_pad / pad_length;
5588+
size_t remaining_left_pad_chars = left_pad % pad_length;
5589+
size_t remaining_right_pad_chars = right_pad % pad_length;
5590+
5591+
if (UNEXPECTED(full_left_pad_chars > SIZE_MAX / ZSTR_LEN(pad) || full_right_pad_chars > SIZE_MAX / ZSTR_LEN(pad))) {
5592+
goto overflow_no_release;
5593+
}
5594+
5595+
size_t full_left_pad_bytes = full_left_pad_chars * ZSTR_LEN(pad);
5596+
size_t full_right_pad_bytes = full_right_pad_chars * ZSTR_LEN(pad);
5597+
5598+
/* No special fast-path handling necessary for zero-length pads because these functions will not
5599+
* allocate memory in case a zero-length pad is required. */
5600+
zend_string *remaining_left_pad_str = mb_get_substr(pad, 0, remaining_left_pad_chars, encoding);
5601+
zend_string *remaining_right_pad_str = mb_get_substr(pad, 0, remaining_right_pad_chars, encoding);
5602+
5603+
if (UNEXPECTED(full_left_pad_bytes > ZSTR_MAX_LEN - ZSTR_LEN(remaining_left_pad_str)
5604+
|| full_right_pad_bytes > ZSTR_MAX_LEN - ZSTR_LEN(remaining_right_pad_str))) {
5605+
goto overflow;
5606+
}
5607+
5608+
size_t left_pad_bytes = full_left_pad_bytes + ZSTR_LEN(remaining_left_pad_str);
5609+
size_t right_pad_bytes = full_right_pad_bytes + ZSTR_LEN(remaining_right_pad_str);
5610+
5611+
if (UNEXPECTED(left_pad_bytes > ZSTR_MAX_LEN - right_pad_bytes
5612+
|| ZSTR_LEN(input) > ZSTR_MAX_LEN - left_pad_bytes - right_pad_bytes)) {
5613+
goto overflow;
5614+
}
5615+
5616+
zend_string *result = zend_string_alloc(ZSTR_LEN(input) + left_pad_bytes + right_pad_bytes, false);
5617+
char *buffer = ZSTR_VAL(result);
5618+
5619+
/* First we pad the left. */
5620+
for (size_t i = 0; i < full_left_pad_chars; i++, buffer += ZSTR_LEN(pad)) {
5621+
memcpy(buffer, ZSTR_VAL(pad), ZSTR_LEN(pad));
5622+
}
5623+
memcpy(buffer, ZSTR_VAL(remaining_left_pad_str), ZSTR_LEN(remaining_left_pad_str));
5624+
buffer += ZSTR_LEN(remaining_left_pad_str);
5625+
5626+
/* Then we copy the input string. */
5627+
memcpy(buffer, ZSTR_VAL(input), ZSTR_LEN(input));
5628+
buffer += ZSTR_LEN(input);
5629+
5630+
/* Finally, we pad on the right. */
5631+
for (size_t i = 0; i < full_right_pad_chars; i++, buffer += ZSTR_LEN(pad)) {
5632+
memcpy(buffer, ZSTR_VAL(pad), ZSTR_LEN(pad));
5633+
}
5634+
memcpy(buffer, ZSTR_VAL(remaining_right_pad_str), ZSTR_LEN(remaining_right_pad_str));
5635+
5636+
ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
5637+
5638+
zend_string_release_ex(remaining_left_pad_str, false);
5639+
zend_string_release_ex(remaining_right_pad_str, false);
5640+
5641+
RETURN_NEW_STR(result);
5642+
5643+
overflow:
5644+
zend_string_release_ex(remaining_left_pad_str, false);
5645+
zend_string_release_ex(remaining_right_pad_str, false);
5646+
overflow_no_release:
5647+
zend_throw_error(NULL, "String size overflow");
5648+
RETURN_THROWS();
5649+
}
5650+
55245651
/* {{{ */
55255652
PHP_FUNCTION(mb_scrub)
55265653
{

ext/mbstring/mbstring.stub.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ function mb_ord(string $string, ?string $encoding = null): int|false {}
183183

184184
function mb_chr(int $codepoint, ?string $encoding = null): string|false {}
185185

186+
function mb_str_pad(string $string, int $length, string $pad_string = " ", int $pad_type = STR_PAD_RIGHT, ?string $encoding = null): string {}
187+
186188
#ifdef HAVE_MBREGEX
187189
/** @refcount 1 */
188190
function mb_regex_encoding(?string $encoding = null): string|bool {}

ext/mbstring/mbstring_arginfo.h

Lines changed: 11 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/mbstring/tests/mb_str_pad.phpt

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
--TEST--
2+
mb_str_pad()
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
8+
echo "--- Error conditions ---\n";
9+
try {
10+
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_RIGHT));
11+
} catch (ValueError $e) {
12+
var_dump($e->getMessage());
13+
}
14+
try {
15+
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_LEFT));
16+
} catch (ValueError $e) {
17+
var_dump($e->getMessage());
18+
}
19+
try {
20+
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_BOTH));
21+
} catch (ValueError $e) {
22+
var_dump($e->getMessage());
23+
}
24+
try {
25+
var_dump(mb_str_pad('▶▶', 6, ' ', 123456));
26+
} catch (ValueError $e) {
27+
var_dump($e->getMessage());
28+
}
29+
try {
30+
var_dump(mb_str_pad('▶▶', 6, ' ', STR_PAD_BOTH, 'unexisting'));
31+
} catch (ValueError $e) {
32+
var_dump($e->getMessage());
33+
}
34+
35+
echo "--- Edge cases pad length ---\n";
36+
var_dump(mb_str_pad('▶▶', 2, ' ', STR_PAD_BOTH));
37+
var_dump(mb_str_pad('▶▶', 1, ' ', STR_PAD_BOTH));
38+
var_dump(mb_str_pad('▶▶', 0, ' ', STR_PAD_BOTH));
39+
var_dump(mb_str_pad('▶▶', -1, ' ', STR_PAD_BOTH));
40+
41+
echo "--- Empty input string ---\n";
42+
var_dump(mb_str_pad('', 2, ' ', STR_PAD_BOTH));
43+
var_dump(mb_str_pad('', 1, ' ', STR_PAD_BOTH));
44+
var_dump(mb_str_pad('', 0, ' ', STR_PAD_BOTH));
45+
var_dump(mb_str_pad('', -1, ' ', STR_PAD_BOTH));
46+
47+
echo "--- No default argument ---\n";
48+
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_RIGHT));
49+
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_LEFT));
50+
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_BOTH));
51+
52+
echo "--- UTF-8 emojis ---\n";
53+
for ($i = 6; $i > 0; $i--) {
54+
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_RIGHT));
55+
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_LEFT));
56+
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_BOTH));
57+
}
58+
59+
echo "--- UTF-8, 32, 7 test ---\n";
60+
61+
// Taken from mb_substr.phpt
62+
$utf8 = "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь";
63+
$utf32 = mb_convert_encoding($utf8, 'UTF-32', 'UTF-8');
64+
$utf7 = mb_convert_encoding($utf8, 'UTF-7', 'UTF-8');
65+
$tests = ["UTF-8" => $utf8, "UTF-32" => $utf32, "UTF-7" => $utf7];
66+
67+
foreach ($tests as $encoding => $test) {
68+
$pad_str = mb_convert_encoding('▶▶', $encoding, 'UTF-8');
69+
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_RIGHT, $encoding), 'UTF-8', $encoding));
70+
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_LEFT, $encoding), 'UTF-8', $encoding));
71+
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_BOTH, $encoding), 'UTF-8', $encoding));
72+
}
73+
?>
74+
--EXPECT--
75+
--- Error conditions ---
76+
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
77+
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
78+
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
79+
string(90) "mb_str_pad(): Argument #4 ($pad_type) must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH"
80+
string(82) "mb_str_pad(): Argument #5 ($encoding) must be a valid encoding, "unexisting" given"
81+
--- Edge cases pad length ---
82+
string(6) "▶▶"
83+
string(6) "▶▶"
84+
string(6) "▶▶"
85+
string(6) "▶▶"
86+
--- Empty input string ---
87+
string(2) " "
88+
string(1) " "
89+
string(0) ""
90+
string(0) ""
91+
--- No default argument ---
92+
string(10) "▶▶ "
93+
string(10) " ▶▶"
94+
string(10) " ▶▶ "
95+
--- UTF-8 emojis ---
96+
string(18) "▶▶❤❓❇❤"
97+
string(18) "❤❓❇❤▶▶"
98+
string(18) "❤❓▶▶❤❓"
99+
string(15) "▶▶❤❓❇"
100+
string(15) "❤❓❇▶▶"
101+
string(15) "❤▶▶❤❓"
102+
string(12) "▶▶❤❓"
103+
string(12) "❤❓▶▶"
104+
string(12) "❤▶▶❤"
105+
string(9) "▶▶❤"
106+
string(9) "❤▶▶"
107+
string(9) "▶▶❤"
108+
string(6) "▶▶"
109+
string(6) "▶▶"
110+
string(6) "▶▶"
111+
string(6) "▶▶"
112+
string(6) "▶▶"
113+
string(6) "▶▶"
114+
--- UTF-8, 32, 7 test ---
115+
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
116+
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
117+
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
118+
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
119+
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
120+
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
121+
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
122+
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
123+
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"

0 commit comments

Comments
 (0)