Skip to content

Commit a507c2e

Browse files
committed
Add bit width length modifier to printf
1 parent dcbb574 commit a507c2e

File tree

8 files changed

+191
-18
lines changed

8 files changed

+191
-18
lines changed

libc/src/stdio/printf_core/converter_utils.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
namespace LIBC_NAMESPACE {
1919
namespace printf_core {
2020

21-
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
21+
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
22+
LengthSpec length_spec) {
23+
auto [lm, bw] = length_spec;
2224
switch (lm) {
2325
case LengthModifier::none:
2426
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
4042
return num & cpp::numeric_limits<uintptr_t>::max();
4143
case LengthModifier::j:
4244
return num; // j is intmax, so no mask is necessary.
45+
case LengthModifier::w:
46+
case LengthModifier::wf: {
47+
uintmax_t mask;
48+
if (bw == 0) {
49+
mask = 0;
50+
} else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
51+
mask = (static_cast<uintmax_t>(1) << bw) - 1;
52+
} else {
53+
mask = UINTMAX_MAX;
54+
}
55+
return num & mask;
56+
}
4357
}
4458
__builtin_unreachable();
4559
}

libc/src/stdio/printf_core/core_structs.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ namespace printf_core {
2020

2121
// These length modifiers match the length modifiers in the format string, which
2222
// is why they are formatted differently from the rest of the file.
23-
enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
23+
enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
24+
25+
struct LengthSpec {
26+
LengthModifier lm;
27+
size_t bit_width;
28+
};
2429

2530
enum FormatFlags : uint8_t {
2631
LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
4247
// Format Specifier Values
4348
FormatFlags flags = FormatFlags(0);
4449
LengthModifier length_modifier = LengthModifier::none;
50+
size_t bit_width = 0;
4551
int min_width = 0;
4652
int precision = -1;
4753

@@ -64,6 +70,7 @@ struct FormatSection {
6470
if (!((static_cast<uint8_t>(flags) ==
6571
static_cast<uint8_t>(other.flags)) &&
6672
(min_width == other.min_width) && (precision == other.precision) &&
73+
(bit_width == other.bit_width) &&
6774
(length_modifier == other.length_modifier) &&
6875
(conv_name == other.conv_name)))
6976
return false;

libc/src/stdio/printf_core/int_converter.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
7171
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
7272
bool is_negative = false;
7373
FormatFlags flags = to_conv.flags;
74-
7574
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
7675

7776
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
8988
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
9089
}
9190

92-
num = apply_length_modifier(num, to_conv.length_modifier);
93-
91+
num =
92+
apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
9493
cpp::array<char, details::num_buf_size()> buf;
9594
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
9695
if (!str)

libc/src/stdio/printf_core/parser.h

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
136136
}
137137
}
138138

139-
LengthModifier lm = parse_length_modifier(&cur_pos);
140-
139+
auto [lm, bw] = parse_length_modifier(&cur_pos);
141140
section.length_modifier = lm;
142141
section.conv_name = str[cur_pos];
142+
section.bit_width = bw;
143143
switch (str[cur_pos]) {
144144
case ('%'):
145145
// Regardless of options, a % conversion is always safe. The standard
@@ -188,6 +188,19 @@ template <typename ArgProvider> class Parser {
188188

189189
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
190190
break;
191+
192+
case (LengthModifier::w):
193+
case (LengthModifier::wf):
194+
if (bw <= INT_WIDTH) {
195+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
196+
} else if (bw <= LONG_WIDTH) {
197+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
198+
} else if (bw <= LLONG_WIDTH) {
199+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
200+
} else {
201+
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
202+
}
203+
break;
191204
}
192205
break;
193206
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
@@ -273,38 +286,51 @@ template <typename ArgProvider> class Parser {
273286
// assumes that str[*local_pos] is inside a format specifier. It returns a
274287
// LengthModifier with the length modifier it found. It will advance local_pos
275288
// after the format specifier if one is found.
276-
LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
289+
LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
277290
switch (str[*local_pos]) {
278291
case ('l'):
279292
if (str[*local_pos + 1] == 'l') {
280293
*local_pos += 2;
281-
return LengthModifier::ll;
294+
return {LengthModifier::ll, 0};
295+
} else {
296+
++*local_pos;
297+
return {LengthModifier::l, 0};
298+
}
299+
case ('w'): {
300+
LengthModifier lm;
301+
if (str[*local_pos + 1] == 'f') {
302+
*local_pos += 2;
303+
lm = LengthModifier::wf;
282304
} else {
283305
++*local_pos;
284-
return LengthModifier::l;
306+
lm = LengthModifier::w;
285307
}
308+
const auto result = internal::strtointeger<int>(str + *local_pos, 10);
309+
*local_pos += result.parsed_len;
310+
return {lm, static_cast<size_t>(cpp::max(0, result.value))};
311+
}
286312
case ('h'):
287313
if (str[*local_pos + 1] == 'h') {
288314
*local_pos += 2;
289-
return LengthModifier::hh;
315+
return {LengthModifier::hh, 0};
290316
} else {
291317
++*local_pos;
292-
return LengthModifier::h;
318+
return {LengthModifier::h, 0};
293319
}
294320
case ('L'):
295321
++*local_pos;
296-
return LengthModifier::L;
322+
return {LengthModifier::L, 0};
297323
case ('j'):
298324
++*local_pos;
299-
return LengthModifier::j;
325+
return {LengthModifier::j, 0};
300326
case ('z'):
301327
++*local_pos;
302-
return LengthModifier::z;
328+
return {LengthModifier::z, 0};
303329
case ('t'):
304330
++*local_pos;
305-
return LengthModifier::t;
331+
return {LengthModifier::t, 0};
306332
default:
307-
return LengthModifier::none;
333+
return {LengthModifier::none, 0};
308334
}
309335
}
310336

@@ -460,7 +486,7 @@ template <typename ArgProvider> class Parser {
460486
}
461487
}
462488

463-
LengthModifier lm = parse_length_modifier(&local_pos);
489+
auto [lm, bw] = parse_length_modifier(&local_pos);
464490

465491
// if we don't have an index for this conversion, then its position is
466492
// unknown and all this information is irrelevant. The rest of this
@@ -511,6 +537,18 @@ template <typename ArgProvider> class Parser {
511537
case (LengthModifier::t):
512538
conv_size = type_desc_from_type<ptrdiff_t>();
513539
break;
540+
case (LengthModifier::w):
541+
case (LengthModifier::wf):
542+
if (bw <= INT_WIDTH) {
543+
conv_size = type_desc_from_type<int>();
544+
} else if (bw <= LONG_WIDTH) {
545+
conv_size = type_desc_from_type<long>();
546+
} else if (bw <= LLONG_WIDTH) {
547+
conv_size = type_desc_from_type<long long>();
548+
} else {
549+
conv_size = type_desc_from_type<intmax_t>();
550+
}
551+
break;
514552
}
515553
break;
516554
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT

libc/src/stdio/printf_core/write_int_converter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
5555
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
5656
break;
5757
case LengthModifier::j:
58+
case LengthModifier::w:
59+
case LengthModifier::wf:
5860
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
5961
break;
6062
}

libc/test/UnitTest/PrintfMatcher.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ namespace {
3939
case (LengthModifier::lm): \
4040
tlog << #lm; \
4141
break
42+
#define CASE_LM_BIT_WIDTH(lm, bw) \
43+
case (LengthModifier::lm): \
44+
tlog << #lm << "\n\tbit width: :" << bw; \
45+
break
4246

4347
static void display(FormatSection form) {
4448
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
6771
CASE_LM(z);
6872
CASE_LM(t);
6973
CASE_LM(L);
74+
CASE_LM_BIT_WIDTH(w, form.bit_width);
75+
CASE_LM_BIT_WIDTH(wf, form.bit_width);
7076
}
7177
tlog << "\n";
7278
tlog << "\tconversion name: " << form.conv_name << "\n";

libc/test/src/stdio/printf_core/parser_test.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
223223
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
224224
}
225225

226+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
227+
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
228+
const char *str = "%w32d";
229+
long long arg1 = 12345;
230+
evaluate(format_arr, str, arg1);
231+
232+
LIBC_NAMESPACE::printf_core::FormatSection expected;
233+
expected.has_conv = true;
234+
235+
expected.raw_string = {str, 5};
236+
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
237+
expected.bit_width = 32;
238+
expected.conv_val_raw = arg1;
239+
expected.conv_name = 'd';
240+
241+
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
242+
}
243+
244+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
245+
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
246+
const char *str = "%wf32d";
247+
long long arg1 = 12345;
248+
evaluate(format_arr, str, arg1);
249+
250+
LIBC_NAMESPACE::printf_core::FormatSection expected;
251+
expected.has_conv = true;
252+
253+
expected.raw_string = {str, 6};
254+
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
255+
expected.bit_width = 32;
256+
expected.conv_val_raw = arg1;
257+
expected.conv_name = 'd';
258+
259+
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
260+
}
261+
226262
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
227263
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
228264
const char *str = "% -056.78jd";

libc/test/src/stdio/sprintf_test.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "test/UnitTest/RoundingModeUtils.h"
1313
#include "test/UnitTest/Test.h"
1414
#include <inttypes.h>
15+
#include <src/string/strlen.h>
1516

1617
// TODO: Add a comment here explaining the printf format string.
1718

@@ -169,6 +170,76 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
169170
EXPECT_EQ(written, 20);
170171
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
171172

173+
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
174+
EXPECT_EQ(written, 1);
175+
ASSERT_STREQ(buff, "7");
176+
177+
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
178+
EXPECT_EQ(written, 1);
179+
ASSERT_STREQ(buff, "1");
180+
181+
written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807l);
182+
EXPECT_EQ(written, 19);
183+
ASSERT_STREQ(buff, "9223372036854775807");
184+
185+
written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807);
186+
EXPECT_EQ(written, 1);
187+
ASSERT_STREQ(buff, "0");
188+
189+
written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807);
190+
EXPECT_EQ(written, 1);
191+
ASSERT_STREQ(buff, "0");
192+
193+
written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807l);
194+
EXPECT_EQ(written, 19);
195+
ASSERT_STREQ(buff, "9223372036854775807");
196+
197+
char format[64];
198+
char uintmax[128];
199+
LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
200+
const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
201+
written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
202+
EXPECT_EQ(written, uintmax_len);
203+
ASSERT_STREQ(buff, uintmax);
204+
205+
written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
206+
EXPECT_EQ(written, 20);
207+
ASSERT_STREQ(buff, "18446744073709551615"); // ull max
208+
209+
written =
210+
LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
211+
EXPECT_EQ(written, 20);
212+
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
213+
214+
written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
215+
EXPECT_EQ(written, 1);
216+
ASSERT_STREQ(buff, "7");
217+
218+
written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
219+
EXPECT_EQ(written, 1);
220+
ASSERT_STREQ(buff, "1");
221+
222+
written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
223+
EXPECT_EQ(written, 20);
224+
ASSERT_STREQ(buff, "18446744073709551615"); // ull max
225+
226+
written =
227+
LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
228+
EXPECT_EQ(written, 20);
229+
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
230+
231+
written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807);
232+
EXPECT_EQ(written, 1);
233+
ASSERT_STREQ(buff, "0");
234+
235+
written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807);
236+
EXPECT_EQ(written, 1);
237+
ASSERT_STREQ(buff, "0");
238+
239+
written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 5807);
240+
EXPECT_EQ(written, 4);
241+
ASSERT_STREQ(buff, "5807");
242+
172243
// Min Width Tests.
173244

174245
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);

0 commit comments

Comments
 (0)