Skip to content

Commit 87d5ad0

Browse files
committed
Add bit width length modifier to printf
1 parent dcbb574 commit 87d5ad0

File tree

9 files changed

+215
-18
lines changed

9 files changed

+215
-18
lines changed

libc/docs/dev/printf_behavior.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ If a number passed as a min width or precision value is out of range for an int,
164164
then it will be treated as the largest or smallest value in the int range
165165
(e.g. "%-999999999999.999999999999s" is the same as "%-2147483648.2147483647s").
166166

167+
If a number passed as a bit width is less than or equal to zero, the conversion
168+
is considered invalid. If the provided bit width is larger than the width of
169+
uintmax_t, it will be clamped to the width of uintmax_t.
170+
167171
----------
168172
Conversion
169173
----------

libc/src/stdio/printf_core/converter_utils.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
namespace LIBC_NAMESPACE {
1919
namespace printf_core {
2020

21-
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
21+
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
22+
LengthSpec length_spec) {
23+
auto [lm, bw] = length_spec;
2224
switch (lm) {
2325
case LengthModifier::none:
2426
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
4042
return num & cpp::numeric_limits<uintptr_t>::max();
4143
case LengthModifier::j:
4244
return num; // j is intmax, so no mask is necessary.
45+
case LengthModifier::w:
46+
case LengthModifier::wf: {
47+
uintmax_t mask;
48+
if (bw == 0) {
49+
mask = 0;
50+
} else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
51+
mask = (static_cast<uintmax_t>(1) << bw) - 1;
52+
} else {
53+
mask = UINTMAX_MAX;
54+
}
55+
return num & mask;
56+
}
4357
}
4458
__builtin_unreachable();
4559
}

libc/src/stdio/printf_core/core_structs.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ namespace printf_core {
2020

2121
// These length modifiers match the length modifiers in the format string, which
2222
// is why they are formatted differently from the rest of the file.
23-
enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
23+
enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
24+
25+
struct LengthSpec {
26+
LengthModifier lm;
27+
size_t bit_width;
28+
};
2429

2530
enum FormatFlags : uint8_t {
2631
LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
4247
// Format Specifier Values
4348
FormatFlags flags = FormatFlags(0);
4449
LengthModifier length_modifier = LengthModifier::none;
50+
size_t bit_width = 0;
4551
int min_width = 0;
4652
int precision = -1;
4753

@@ -64,6 +70,7 @@ struct FormatSection {
6470
if (!((static_cast<uint8_t>(flags) ==
6571
static_cast<uint8_t>(other.flags)) &&
6672
(min_width == other.min_width) && (precision == other.precision) &&
73+
(bit_width == other.bit_width) &&
6774
(length_modifier == other.length_modifier) &&
6875
(conv_name == other.conv_name)))
6976
return false;

libc/src/stdio/printf_core/int_converter.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
7171
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
7272
bool is_negative = false;
7373
FormatFlags flags = to_conv.flags;
74-
7574
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
7675

7776
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
8988
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
9089
}
9190

92-
num = apply_length_modifier(num, to_conv.length_modifier);
93-
91+
num =
92+
apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
9493
cpp::array<char, details::num_buf_size()> buf;
9594
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
9695
if (!str)

libc/src/stdio/printf_core/parser.h

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
136136
}
137137
}
138138

139-
LengthModifier lm = parse_length_modifier(&cur_pos);
140-
139+
auto [lm, bw] = parse_length_modifier(&cur_pos);
141140
section.length_modifier = lm;
142141
section.conv_name = str[cur_pos];
142+
section.bit_width = bw;
143143
switch (str[cur_pos]) {
144144
case ('%'):
145145
// Regardless of options, a % conversion is always safe. The standard
@@ -188,6 +188,21 @@ template <typename ArgProvider> class Parser {
188188

189189
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
190190
break;
191+
192+
case (LengthModifier::w):
193+
case (LengthModifier::wf):
194+
if (bw == 0) {
195+
section.has_conv = false;
196+
} else if (bw <= INT_WIDTH) {
197+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
198+
} else if (bw <= LONG_WIDTH) {
199+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
200+
} else if (bw <= LLONG_WIDTH) {
201+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
202+
} else {
203+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
204+
}
205+
break;
191206
}
192207
break;
193208
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
@@ -273,38 +288,54 @@ template <typename ArgProvider> class Parser {
273288
// assumes that str[*local_pos] is inside a format specifier. It returns a
274289
// LengthModifier with the length modifier it found. It will advance local_pos
275290
// after the format specifier if one is found.
276-
LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
291+
LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
277292
switch (str[*local_pos]) {
278293
case ('l'):
279294
if (str[*local_pos + 1] == 'l') {
280295
*local_pos += 2;
281-
return LengthModifier::ll;
296+
return {LengthModifier::ll, 0};
282297
} else {
283298
++*local_pos;
284-
return LengthModifier::l;
299+
return {LengthModifier::l, 0};
285300
}
301+
case ('w'): {
302+
LengthModifier lm;
303+
if (str[*local_pos + 1] == 'f') {
304+
*local_pos += 2;
305+
lm = LengthModifier::wf;
306+
} else {
307+
++*local_pos;
308+
lm = LengthModifier::w;
309+
}
310+
if (internal::isdigit(str[*local_pos])) {
311+
const auto result = internal::strtointeger<int>(str + *local_pos, 10);
312+
*local_pos += result.parsed_len;
313+
return {lm, static_cast<size_t>(cpp::max(0, result.value))};
314+
}
315+
return {lm, 0};
316+
}
286317
case ('h'):
287318
if (str[*local_pos + 1] == 'h') {
288319
*local_pos += 2;
289-
return LengthModifier::hh;
320+
return {LengthModifier::hh, 0};
290321
} else {
291322
++*local_pos;
292-
return LengthModifier::h;
323+
return {LengthModifier::h, 0};
293324
}
294325
case ('L'):
295326
++*local_pos;
296-
return LengthModifier::L;
327+
return {LengthModifier::L, 0};
297328
case ('j'):
298329
++*local_pos;
299-
return LengthModifier::j;
330+
return {LengthModifier::j, 0};
300331
case ('z'):
301332
++*local_pos;
302-
return LengthModifier::z;
333+
return {LengthModifier::z, 0};
303334
case ('t'):
304335
++*local_pos;
305-
return LengthModifier::t;
336+
return {LengthModifier::t, 0};
306337
default:
307-
return LengthModifier::none;
338+
return {LengthModifier::none, 0};
308339
}
309340
}
310341

@@ -460,7 +491,7 @@ template <typename ArgProvider> class Parser {
460491
}
461492
}
462493

463-
LengthModifier lm = parse_length_modifier(&local_pos);
494+
auto [lm, bw] = parse_length_modifier(&local_pos);
464495

465496
// if we don't have an index for this conversion, then its position is
466497
// unknown and all this information is irrelevant. The rest of this
@@ -511,6 +542,18 @@ template <typename ArgProvider> class Parser {
511542
case (LengthModifier::t):
512543
conv_size = type_desc_from_type<ptrdiff_t>();
513544
break;
545+
case (LengthModifier::w):
546+
case (LengthModifier::wf):
547+
if (bw <= INT_WIDTH) {
548+
conv_size = type_desc_from_type<int>();
549+
} else if (bw <= LONG_WIDTH) {
550+
conv_size = type_desc_from_type<long>();
551+
} else if (bw <= LLONG_WIDTH) {
552+
conv_size = type_desc_from_type<long long>();
553+
} else {
554+
conv_size = type_desc_from_type<intmax_t>();
555+
}
556+
break;
514557
}
515558
break;
516559
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT

libc/src/stdio/printf_core/write_int_converter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
5555
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
5656
break;
5757
case LengthModifier::j:
58+
case LengthModifier::w:
59+
case LengthModifier::wf:
5860
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
5961
break;
6062
}

libc/test/UnitTest/PrintfMatcher.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ namespace {
3939
case (LengthModifier::lm): \
4040
tlog << #lm; \
4141
break
42+
#define CASE_LM_BIT_WIDTH(lm, bw) \
43+
case (LengthModifier::lm): \
44+
tlog << #lm << "\n\tbit width: :" << bw; \
45+
break
4246

4347
static void display(FormatSection form) {
4448
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
6771
CASE_LM(z);
6872
CASE_LM(t);
6973
CASE_LM(L);
74+
CASE_LM_BIT_WIDTH(w, form.bit_width);
75+
CASE_LM_BIT_WIDTH(wf, form.bit_width);
7076
}
7177
tlog << "\n";
7278
tlog << "\tconversion name: " << form.conv_name << "\n";

libc/test/src/stdio/printf_core/parser_test.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
223223
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
224224
}
225225

226+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
227+
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
228+
const char *str = "%w32d";
229+
long long arg1 = 12345;
230+
evaluate(format_arr, str, arg1);
231+
232+
LIBC_NAMESPACE::printf_core::FormatSection expected;
233+
expected.has_conv = true;
234+
235+
expected.raw_string = {str, 5};
236+
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
237+
expected.bit_width = 32;
238+
expected.conv_val_raw = arg1;
239+
expected.conv_name = 'd';
240+
241+
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
242+
}
243+
244+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
245+
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
246+
const char *str = "%wf32d";
247+
long long arg1 = 12345;
248+
evaluate(format_arr, str, arg1);
249+
250+
LIBC_NAMESPACE::printf_core::FormatSection expected;
251+
expected.has_conv = true;
252+
253+
expected.raw_string = {str, 6};
254+
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
255+
expected.bit_width = 32;
256+
expected.conv_val_raw = arg1;
257+
expected.conv_name = 'd';
258+
259+
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
260+
}
261+
226262
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
227263
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
228264
const char *str = "% -056.78jd";

libc/test/src/stdio/sprintf_test.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,92 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
169169
EXPECT_EQ(written, 20);
170170
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
171171

172+
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
173+
EXPECT_EQ(written, 1);
174+
ASSERT_STREQ(buff, "7");
175+
176+
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
177+
EXPECT_EQ(written, 1);
178+
ASSERT_STREQ(buff, "1");
179+
180+
written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807ll);
181+
EXPECT_EQ(written, 19);
182+
ASSERT_STREQ(buff, "9223372036854775807");
183+
184+
written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807);
185+
EXPECT_EQ(written, 5);
186+
ASSERT_STREQ(buff, "%w-1d");
187+
188+
written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807);
189+
EXPECT_EQ(written, 4);
190+
ASSERT_STREQ(buff, "%w0d");
191+
192+
written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807ll);
193+
EXPECT_EQ(written, 19);
194+
ASSERT_STREQ(buff, "9223372036854775807");
195+
196+
written = LIBC_NAMESPACE::sprintf(buff, "%winvalid%w1d", 5807, 5807);
197+
EXPECT_EQ(written, 10);
198+
ASSERT_STREQ(buff, "%winvalid1");
199+
200+
written = LIBC_NAMESPACE::sprintf(buff, "%w-1d%w1d", 5807, 5807);
201+
EXPECT_EQ(written, 6);
202+
ASSERT_STREQ(buff, "%w-1d1");
203+
204+
char format[64];
205+
char uintmax[128];
206+
LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
207+
const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
208+
written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
209+
EXPECT_EQ(written, uintmax_len);
210+
ASSERT_STREQ(buff, uintmax);
211+
212+
written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
213+
EXPECT_EQ(written, 20);
214+
ASSERT_STREQ(buff, "18446744073709551615"); // ull max
215+
216+
written =
217+
LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
218+
EXPECT_EQ(written, 20);
219+
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
220+
221+
written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
222+
EXPECT_EQ(written, 1);
223+
ASSERT_STREQ(buff, "7");
224+
225+
written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
226+
EXPECT_EQ(written, 1);
227+
ASSERT_STREQ(buff, "1");
228+
229+
written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
230+
EXPECT_EQ(written, 20);
231+
ASSERT_STREQ(buff, "18446744073709551615"); // ull max
232+
233+
written =
234+
LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
235+
EXPECT_EQ(written, 20);
236+
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
237+
238+
written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807);
239+
EXPECT_EQ(written, 5);
240+
ASSERT_STREQ(buff, "%wf0d");
241+
242+
written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807);
243+
EXPECT_EQ(written, 6);
244+
ASSERT_STREQ(buff, "%wf-1d");
245+
246+
written = LIBC_NAMESPACE::sprintf(buff, "%wfinvalid%wf1d", 5807, 5807);
247+
EXPECT_EQ(written, 11);
248+
ASSERT_STREQ(buff, "%wfinvalid1");
249+
250+
written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d%wf1d", 5807, 5807);
251+
EXPECT_EQ(written, 7);
252+
ASSERT_STREQ(buff, "%wf-1d1");
253+
254+
written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 9223372036854775807ll);
255+
EXPECT_EQ(written, 19);
256+
ASSERT_STREQ(buff, "9223372036854775807");
257+
172258
// Min Width Tests.
173259

174260
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);

0 commit comments

Comments
 (0)