Skip to content

Commit fdef5c4

Browse files
authored
Add bit width length modifier to printf (#82461)
Resolves #81685. This adds support for wN and wfN length modifiers in fprintf.
1 parent 60deb8b commit fdef5c4

File tree

9 files changed

+215
-18
lines changed

9 files changed

+215
-18
lines changed

libc/docs/dev/printf_behavior.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ If a number passed as a min width or precision value is out of range for an int,
173173
then it will be treated as the largest or smallest value in the int range
174174
(e.g. "%-999999999999.999999999999s" is the same as "%-2147483648.2147483647s").
175175

176+
If a number passed as a bit width is less than or equal to zero, the conversion
177+
is considered invalid. If the provided bit width is larger than the width of
178+
uintmax_t, it will be clamped to the width of uintmax_t.
179+
176180
----------
177181
Conversion
178182
----------

libc/src/stdio/printf_core/converter_utils.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
namespace LIBC_NAMESPACE {
1919
namespace printf_core {
2020

21-
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
21+
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
22+
LengthSpec length_spec) {
23+
auto [lm, bw] = length_spec;
2224
switch (lm) {
2325
case LengthModifier::none:
2426
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
4042
return num & cpp::numeric_limits<uintptr_t>::max();
4143
case LengthModifier::j:
4244
return num; // j is intmax, so no mask is necessary.
45+
case LengthModifier::w:
46+
case LengthModifier::wf: {
47+
uintmax_t mask;
48+
if (bw == 0) {
49+
mask = 0;
50+
} else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
51+
mask = (static_cast<uintmax_t>(1) << bw) - 1;
52+
} else {
53+
mask = UINTMAX_MAX;
54+
}
55+
return num & mask;
56+
}
4357
}
4458
__builtin_unreachable();
4559
}

libc/src/stdio/printf_core/core_structs.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@ namespace printf_core {
2222

2323
// These length modifiers match the length modifiers in the format string, which
2424
// is why they are formatted differently from the rest of the file.
25-
enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
25+
enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
26+
27+
struct LengthSpec {
28+
LengthModifier lm;
29+
size_t bit_width;
30+
};
2631

2732
enum FormatFlags : uint8_t {
2833
LEFT_JUSTIFIED = 0x01, // -
@@ -44,6 +49,7 @@ struct FormatSection {
4449
// Format Specifier Values
4550
FormatFlags flags = FormatFlags(0);
4651
LengthModifier length_modifier = LengthModifier::none;
52+
size_t bit_width = 0;
4753
int min_width = 0;
4854
int precision = -1;
4955

@@ -66,6 +72,7 @@ struct FormatSection {
6672
if (!((static_cast<uint8_t>(flags) ==
6773
static_cast<uint8_t>(other.flags)) &&
6874
(min_width == other.min_width) && (precision == other.precision) &&
75+
(bit_width == other.bit_width) &&
6976
(length_modifier == other.length_modifier) &&
7077
(conv_name == other.conv_name)))
7178
return false;

libc/src/stdio/printf_core/int_converter.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
7171
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
7272
bool is_negative = false;
7373
FormatFlags flags = to_conv.flags;
74-
7574
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
7675

7776
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
8988
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
9089
}
9190

92-
num = apply_length_modifier(num, to_conv.length_modifier);
93-
91+
num =
92+
apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
9493
cpp::array<char, details::num_buf_size()> buf;
9594
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
9695
if (!str)

libc/src/stdio/printf_core/parser.h

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ template <typename ArgProvider> class Parser {
150150
}
151151
}
152152

153-
LengthModifier lm = parse_length_modifier(&cur_pos);
154-
153+
auto [lm, bw] = parse_length_modifier(&cur_pos);
155154
section.length_modifier = lm;
156155
section.conv_name = str[cur_pos];
156+
section.bit_width = bw;
157157
switch (str[cur_pos]) {
158158
case ('%'):
159159
// Regardless of options, a % conversion is always safe. The standard
@@ -202,6 +202,21 @@ template <typename ArgProvider> class Parser {
202202

203203
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
204204
break;
205+
206+
case (LengthModifier::w):
207+
case (LengthModifier::wf):
208+
if (bw == 0) {
209+
section.has_conv = false;
210+
} else if (bw <= INT_WIDTH) {
211+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
212+
} else if (bw <= LONG_WIDTH) {
213+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
214+
} else if (bw <= LLONG_WIDTH) {
215+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
216+
} else {
217+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
218+
}
219+
break;
205220
}
206221
break;
207222
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
@@ -306,38 +321,54 @@ template <typename ArgProvider> class Parser {
306321
// assumes that str[*local_pos] is inside a format specifier. It returns a
307322
// LengthModifier with the length modifier it found. It will advance local_pos
308323
// after the format specifier if one is found.
309-
LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
324+
LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
310325
switch (str[*local_pos]) {
311326
case ('l'):
312327
if (str[*local_pos + 1] == 'l') {
313328
*local_pos += 2;
314-
return LengthModifier::ll;
329+
return {LengthModifier::ll, 0};
330+
} else {
331+
++*local_pos;
332+
return {LengthModifier::l, 0};
333+
}
334+
case ('w'): {
335+
LengthModifier lm;
336+
if (str[*local_pos + 1] == 'f') {
337+
*local_pos += 2;
338+
lm = LengthModifier::wf;
315339
} else {
316340
++*local_pos;
317-
return LengthModifier::l;
341+
lm = LengthModifier::w;
318342
}
343+
if (internal::isdigit(str[*local_pos])) {
344+
const auto result = internal::strtointeger<int>(str + *local_pos, 10);
345+
*local_pos += result.parsed_len;
346+
return {lm, static_cast<size_t>(cpp::max(0, result.value))};
347+
}
348+
return {lm, 0};
349+
}
319350
case ('h'):
320351
if (str[*local_pos + 1] == 'h') {
321352
*local_pos += 2;
322-
return LengthModifier::hh;
353+
return {LengthModifier::hh, 0};
323354
} else {
324355
++*local_pos;
325-
return LengthModifier::h;
356+
return {LengthModifier::h, 0};
326357
}
327358
case ('L'):
328359
++*local_pos;
329-
return LengthModifier::L;
360+
return {LengthModifier::L, 0};
330361
case ('j'):
331362
++*local_pos;
332-
return LengthModifier::j;
363+
return {LengthModifier::j, 0};
333364
case ('z'):
334365
++*local_pos;
335-
return LengthModifier::z;
366+
return {LengthModifier::z, 0};
336367
case ('t'):
337368
++*local_pos;
338-
return LengthModifier::t;
369+
return {LengthModifier::t, 0};
339370
default:
340-
return LengthModifier::none;
371+
return {LengthModifier::none, 0};
341372
}
342373
}
343374

@@ -509,7 +540,7 @@ template <typename ArgProvider> class Parser {
509540
}
510541
}
511542

512-
LengthModifier lm = parse_length_modifier(&local_pos);
543+
auto [lm, bw] = parse_length_modifier(&local_pos);
513544

514545
// if we don't have an index for this conversion, then its position is
515546
// unknown and all this information is irrelevant. The rest of this
@@ -560,6 +591,18 @@ template <typename ArgProvider> class Parser {
560591
case (LengthModifier::t):
561592
conv_size = type_desc_from_type<ptrdiff_t>();
562593
break;
594+
case (LengthModifier::w):
595+
case (LengthModifier::wf):
596+
if (bw <= INT_WIDTH) {
597+
conv_size = type_desc_from_type<int>();
598+
} else if (bw <= LONG_WIDTH) {
599+
conv_size = type_desc_from_type<long>();
600+
} else if (bw <= LLONG_WIDTH) {
601+
conv_size = type_desc_from_type<long long>();
602+
} else {
603+
conv_size = type_desc_from_type<intmax_t>();
604+
}
605+
break;
563606
}
564607
break;
565608
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT

libc/src/stdio/printf_core/write_int_converter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
5555
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
5656
break;
5757
case LengthModifier::j:
58+
case LengthModifier::w:
59+
case LengthModifier::wf:
5860
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
5961
break;
6062
}

libc/test/UnitTest/PrintfMatcher.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ namespace {
3939
case (LengthModifier::lm): \
4040
tlog << #lm; \
4141
break
42+
#define CASE_LM_BIT_WIDTH(lm, bw) \
43+
case (LengthModifier::lm): \
44+
tlog << #lm << "\n\tbit width: :" << bw; \
45+
break
4246

4347
static void display(FormatSection form) {
4448
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
6771
CASE_LM(z);
6872
CASE_LM(t);
6973
CASE_LM(L);
74+
CASE_LM_BIT_WIDTH(w, form.bit_width);
75+
CASE_LM_BIT_WIDTH(wf, form.bit_width);
7076
}
7177
tlog << "\n";
7278
tlog << "\tconversion name: " << form.conv_name << "\n";

libc/test/src/stdio/printf_core/parser_test.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
223223
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
224224
}
225225

226+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
227+
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
228+
const char *str = "%w32d";
229+
long long arg1 = 12345;
230+
evaluate(format_arr, str, arg1);
231+
232+
LIBC_NAMESPACE::printf_core::FormatSection expected;
233+
expected.has_conv = true;
234+
235+
expected.raw_string = {str, 5};
236+
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
237+
expected.bit_width = 32;
238+
expected.conv_val_raw = arg1;
239+
expected.conv_name = 'd';
240+
241+
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
242+
}
243+
244+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
245+
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
246+
const char *str = "%wf32d";
247+
long long arg1 = 12345;
248+
evaluate(format_arr, str, arg1);
249+
250+
LIBC_NAMESPACE::printf_core::FormatSection expected;
251+
expected.has_conv = true;
252+
253+
expected.raw_string = {str, 6};
254+
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
255+
expected.bit_width = 32;
256+
expected.conv_val_raw = arg1;
257+
expected.conv_name = 'd';
258+
259+
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
260+
}
261+
226262
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
227263
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
228264
const char *str = "% -056.78jd";

libc/test/src/stdio/sprintf_test.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,92 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
169169
EXPECT_EQ(written, 20);
170170
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
171171

172+
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
173+
EXPECT_EQ(written, 1);
174+
ASSERT_STREQ(buff, "7");
175+
176+
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
177+
EXPECT_EQ(written, 1);
178+
ASSERT_STREQ(buff, "1");
179+
180+
written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807ll);
181+
EXPECT_EQ(written, 19);
182+
ASSERT_STREQ(buff, "9223372036854775807");
183+
184+
written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807);
185+
EXPECT_EQ(written, 5);
186+
ASSERT_STREQ(buff, "%w-1d");
187+
188+
written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807);
189+
EXPECT_EQ(written, 4);
190+
ASSERT_STREQ(buff, "%w0d");
191+
192+
written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807ll);
193+
EXPECT_EQ(written, 19);
194+
ASSERT_STREQ(buff, "9223372036854775807");
195+
196+
written = LIBC_NAMESPACE::sprintf(buff, "%winvalid%w1d", 5807, 5807);
197+
EXPECT_EQ(written, 10);
198+
ASSERT_STREQ(buff, "%winvalid1");
199+
200+
written = LIBC_NAMESPACE::sprintf(buff, "%w-1d%w1d", 5807, 5807);
201+
EXPECT_EQ(written, 6);
202+
ASSERT_STREQ(buff, "%w-1d1");
203+
204+
char format[64];
205+
char uintmax[128];
206+
LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
207+
const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
208+
written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
209+
EXPECT_EQ(written, uintmax_len);
210+
ASSERT_STREQ(buff, uintmax);
211+
212+
written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
213+
EXPECT_EQ(written, 20);
214+
ASSERT_STREQ(buff, "18446744073709551615"); // ull max
215+
216+
written =
217+
LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
218+
EXPECT_EQ(written, 20);
219+
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
220+
221+
written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
222+
EXPECT_EQ(written, 1);
223+
ASSERT_STREQ(buff, "7");
224+
225+
written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
226+
EXPECT_EQ(written, 1);
227+
ASSERT_STREQ(buff, "1");
228+
229+
written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
230+
EXPECT_EQ(written, 20);
231+
ASSERT_STREQ(buff, "18446744073709551615"); // ull max
232+
233+
written =
234+
LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
235+
EXPECT_EQ(written, 20);
236+
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
237+
238+
written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807);
239+
EXPECT_EQ(written, 5);
240+
ASSERT_STREQ(buff, "%wf0d");
241+
242+
written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807);
243+
EXPECT_EQ(written, 6);
244+
ASSERT_STREQ(buff, "%wf-1d");
245+
246+
written = LIBC_NAMESPACE::sprintf(buff, "%wfinvalid%wf1d", 5807, 5807);
247+
EXPECT_EQ(written, 11);
248+
ASSERT_STREQ(buff, "%wfinvalid1");
249+
250+
written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d%wf1d", 5807, 5807);
251+
EXPECT_EQ(written, 7);
252+
ASSERT_STREQ(buff, "%wf-1d1");
253+
254+
written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 9223372036854775807ll);
255+
EXPECT_EQ(written, 19);
256+
ASSERT_STREQ(buff, "9223372036854775807");
257+
172258
// Min Width Tests.
173259

174260
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);

0 commit comments

Comments
 (0)