-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Add bit width length modifier to printf #82461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-libc Author: Om Prakaash (omprakaash) ChangesResolves #81685. This adds support for wN and wfN length modifiers in fprintf. Full diff: https://github.com/llvm/llvm-project/pull/82461.diff 8 Files Affected:
diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..30eab6c4af8892 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -18,7 +18,9 @@
namespace LIBC_NAMESPACE {
namespace printf_core {
-LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
+ LengthSpec length_spec) {
+ auto [lm, bw] = length_spec;
switch (lm) {
case LengthModifier::none:
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
return num & cpp::numeric_limits<uintptr_t>::max();
case LengthModifier::j:
return num; // j is intmax, so no mask is necessary.
+ case LengthModifier::w:
+ case LengthModifier::wf: {
+ uintmax_t mask;
+ if (bw == 0) {
+ mask = 0;
+ } else if (bw < sizeof(uintmax_t) * 8) {
+ mask = (static_cast<uintmax_t>(1) << bw) - 1;
+ } else {
+ mask = UINTMAX_MAX;
+ }
+ return num & mask;
+ }
}
__builtin_unreachable();
}
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..cd8bceb900de74 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -20,7 +20,12 @@ namespace printf_core {
// These length modifiers match the length modifiers in the format string, which
// is why they are formatted differently from the rest of the file.
-enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
+enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
+
+struct LengthSpec {
+ LengthModifier lm;
+ size_t bit_width;
+};
enum FormatFlags : uint8_t {
LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
// Format Specifier Values
FormatFlags flags = FormatFlags(0);
LengthModifier length_modifier = LengthModifier::none;
+ size_t bit_width = 0;
int min_width = 0;
int precision = -1;
@@ -64,6 +70,7 @@ struct FormatSection {
if (!((static_cast<uint8_t>(flags) ==
static_cast<uint8_t>(other.flags)) &&
(min_width == other.min_width) && (precision == other.precision) &&
+ (bit_width == other.bit_width) &&
(length_modifier == other.length_modifier) &&
(conv_name == other.conv_name)))
return false;
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 2efbf53d409382..496e7bd1a56d94 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
bool is_negative = false;
FormatFlags flags = to_conv.flags;
-
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
}
- num = apply_length_modifier(num, to_conv.length_modifier);
-
+ num =
+ apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
cpp::array<char, details::num_buf_size()> buf;
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
if (!str)
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 1e7d2e58c924a6..7a8a9b879dad84 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&cur_pos);
-
+ auto [lm, bw] = parse_length_modifier(&cur_pos);
section.length_modifier = lm;
section.conv_name = str[cur_pos];
+ section.bit_width = bw;
switch (str[cur_pos]) {
case ('%'):
// Regardless of options, a % conversion is always safe. The standard
@@ -177,6 +177,8 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
break;
case (LengthModifier::j):
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
break;
@@ -273,38 +275,51 @@ template <typename ArgProvider> class Parser {
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
- LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
+ LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
switch (str[*local_pos]) {
case ('l'):
if (str[*local_pos + 1] == 'l') {
*local_pos += 2;
- return LengthModifier::ll;
+ return {LengthModifier::ll, 0};
+ } else {
+ ++*local_pos;
+ return {LengthModifier::l, 0};
+ }
+ case ('w'): {
+ LengthModifier lm;
+ if (str[*local_pos + 1] == 'f') {
+ *local_pos += 2;
+ lm = LengthModifier::wf;
} else {
++*local_pos;
- return LengthModifier::l;
+ lm = LengthModifier::w;
}
+ const auto result = internal::strtointeger<size_t>(str + *local_pos, 10);
+ *local_pos += result.parsed_len;
+ return {lm, result.value};
+ }
case ('h'):
if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
- return LengthModifier::hh;
+ return {LengthModifier::hh, 0};
} else {
++*local_pos;
- return LengthModifier::h;
+ return {LengthModifier::h, 0};
}
case ('L'):
++*local_pos;
- return LengthModifier::L;
+ return {LengthModifier::L, 0};
case ('j'):
++*local_pos;
- return LengthModifier::j;
+ return {LengthModifier::j, 0};
case ('z'):
++*local_pos;
- return LengthModifier::z;
+ return {LengthModifier::z, 0};
case ('t'):
++*local_pos;
- return LengthModifier::t;
+ return {LengthModifier::t, 0};
default:
- return LengthModifier::none;
+ return {LengthModifier::none, 0};
}
}
@@ -460,7 +475,7 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&local_pos);
+ auto [lm, bw] = parse_length_modifier(&local_pos);
// if we don't have an index for this conversion, then its position is
// unknown and all this information is irrelevant. The rest of this
@@ -503,6 +518,8 @@ template <typename ArgProvider> class Parser {
conv_size = type_desc_from_type<long long>();
break;
case (LengthModifier::j):
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
conv_size = type_desc_from_type<intmax_t>();
break;
case (LengthModifier::z):
diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h
index 0310905f36f146..18aa5c79897ec4 100644
--- a/libc/src/stdio/printf_core/write_int_converter.h
+++ b/libc/src/stdio/printf_core/write_int_converter.h
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
break;
case LengthModifier::j:
+ case LengthModifier::w:
+ case LengthModifier::wf:
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
break;
}
diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp
index 32f3be73307e3a..c8303815c92296 100644
--- a/libc/test/UnitTest/PrintfMatcher.cpp
+++ b/libc/test/UnitTest/PrintfMatcher.cpp
@@ -39,6 +39,10 @@ namespace {
case (LengthModifier::lm): \
tlog << #lm; \
break
+#define CASE_LM_BIT_WIDTH(lm, bw) \
+ case (LengthModifier::lm): \
+ tlog << #lm << "\n\tbit width: :" << bw; \
+ break
static void display(FormatSection form) {
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
CASE_LM(z);
CASE_LM(t);
CASE_LM(L);
+ CASE_LM_BIT_WIDTH(w, form.bit_width);
+ CASE_LM_BIT_WIDTH(wf, form.bit_width);
}
tlog << "\n";
tlog << "\tconversion name: " << form.conv_name << "\n";
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 0134277c4a1b2d..66d6dd0a86c422 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%w32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 5};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%wf32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 6};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "% -056.78jd";
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 186b37e2898af6..8d61dc3d5dd686 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -169,6 +169,40 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
// Min Width Tests.
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks correct overall, I have a few comments but they should be fairly minor.
uintmax_t mask; | ||
if (bw == 0) { | ||
mask = 0; | ||
} else if (bw < sizeof(uintmax_t) * 8) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
replace 8 with CHAR_BIT
libc/src/stdio/printf_core/parser.h
Outdated
@@ -177,6 +177,8 @@ template <typename ArgProvider> class Parser { | |||
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index); | |||
break; | |||
case (LengthModifier::j): | |||
case (LengthModifier::w): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure that this is correct. We should probably have w
and wf
request an argument of the smallest type that will fit, but is at least the size of an int.
The reason we want to roughly match the int size is because these arguments may be packed together in memory and so we may get incorrect behavior if we request too many bits for a given argument. We can assume nothing's smaller than an int though because va_args automatically promotes to int if the argument is smaller.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh gotcha ! Yep will change that.
} else if (bw < sizeof(uintmax_t) * 8) { | ||
mask = (static_cast<uintmax_t>(1) << bw) - 1; | ||
} else { | ||
mask = UINTMAX_MAX; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you should add a test that checks this behavior
2ac5ae2
to
d026192
Compare
libc/test/src/stdio/sprintf_test.cpp
Outdated
LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH); | ||
LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX); | ||
written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX); | ||
EXPECT_EQ(written, static_cast<int>(strlen(uintmax))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you don't need to call strlen here, just use a variable to store the result from the sprintf call for %ju
and compare against that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh right ! Thx. Will delete the unnecessary call.
@@ -169,6 +170,52 @@ TEST(LlvmLibcSPrintfTest, IntConv) { | |||
EXPECT_EQ(written, 20); | |||
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min | |||
|
|||
written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you also need tests that check the behavior of incorrect bit-widths. You need at least one test each for %w0d
, %w999d
, and %w-1d
.
Are you still working on this? It's very close to done. |
Hey yes. Sorry for the delay. Almost done working on this. I am not sure about the max value that N can take in %wN and %wfN modifiers. The specs document says that N is implementation specific. Currently %w999 would successfully be parsed, it just that actual output would be capped at uintmax of the underlying system. |
Leaving uintmax_t as the max supported size is fine for now, in future I'd like to support uint128 but that will be in a followup patch. As for what to do when 999 is passed, that's up to you. The ideas I have are you could either mark the conversion as invalid (setting |
Got it. Thx. Will try clamping to uintmax_t for now. |
238768b
to
a507c2e
Compare
Currently bit widths (<= 0) result in a output of zero. Is this a good way to deal with this ? I think that that user would have a hard time telling what caused the output to be zero. Maybe in this case, it is better to just skip conversion and just output the entire conversion as is a string (%w-1d would display "%w-1d" ), as it is clearly an invalid conversion specification. The specifications state that N has to be greater than 0 too. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that sounds like a good design. I'd say check that the character after the w
or f
is a digit, similar to how the width argument is handled.
libc/test/src/stdio/sprintf_test.cpp
Outdated
@@ -12,6 +12,7 @@ | |||
#include "test/UnitTest/RoundingModeUtils.h" | |||
#include "test/UnitTest/Test.h" | |||
#include <inttypes.h> | |||
#include <src/string/strlen.h> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can remove this include now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM after some minor changes.
libc/test/src/stdio/sprintf_test.cpp
Outdated
EXPECT_EQ(written, 7); | ||
ASSERT_STREQ(buff, "%wf-1d1"); | ||
|
||
written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 5807); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: This number should be at least 64 bits long, since this is testing that a width of 999
resolves to at least uintmax_t
.
libc/test/src/stdio/sprintf_test.cpp
Outdated
EXPECT_EQ(written, 1); | ||
ASSERT_STREQ(buff, "1"); | ||
|
||
written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807l); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this number should be marked long long
(ends with ll
) instead of just long
(ends with l
). Some platforms define long
to only be 32 bits, but long long
is always 64.
libc/test/src/stdio/sprintf_test.cpp
Outdated
EXPECT_EQ(written, 4); | ||
ASSERT_STREQ(buff, "%w0d"); | ||
|
||
written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807l); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for creating this change! I'll merge it tomorrow morning since I want to give the presubmits a chance to run.
Thank you for reviewing my first PR to LLVM ! |
@omprakaash Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested Please check whether problems have been caused by your change specifically, as How to do this, and the rest of the post-merge process, is covered in detail here. If your change does cause a problem, it may be reverted, or you can revert it yourself. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! |
In patch llvm#82461 the sprintf tests were made to use UINTMAX_WIDTH which isn't defined on all systems. This patch changes it to sizeof(uintmax_t)*CHAR_BIT which is more portable.
There was a minor issue with the tests, this patch will fix forward: #87092 |
In patch #82461 the sprintf tests were made to use UINTMAX_WIDTH which isn't defined on all systems. This patch changes it to sizeof(uintmax_t)*CHAR_BIT which is more portable.
Resolves #81685. This adds support for wN and wfN length modifiers in fprintf.