Skip to content

Add bit width length modifier to printf #82461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions libc/docs/dev/printf_behavior.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ If a number passed as a min width or precision value is out of range for an int,
then it will be treated as the largest or smallest value in the int range
(e.g. "%-999999999999.999999999999s" is the same as "%-2147483648.2147483647s").

If a number passed as a bit width is less than or equal to zero, the conversion
is considered invalid. If the provided bit width is larger than the width of
uintmax_t, it will be clamped to the width of uintmax_t.

----------
Conversion
----------
Expand Down
16 changes: 15 additions & 1 deletion libc/src/stdio/printf_core/converter_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
namespace LIBC_NAMESPACE {
namespace printf_core {

LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
LengthSpec length_spec) {
auto [lm, bw] = length_spec;
switch (lm) {
case LengthModifier::none:
return num & cpp::numeric_limits<unsigned int>::max();
Expand All @@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
return num & cpp::numeric_limits<uintptr_t>::max();
case LengthModifier::j:
return num; // j is intmax, so no mask is necessary.
case LengthModifier::w:
case LengthModifier::wf: {
uintmax_t mask;
if (bw == 0) {
mask = 0;
} else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
mask = (static_cast<uintmax_t>(1) << bw) - 1;
} else {
mask = UINTMAX_MAX;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should add a test that checks this behavior

}
return num & mask;
}
}
__builtin_unreachable();
}
Expand Down
9 changes: 8 additions & 1 deletion libc/src/stdio/printf_core/core_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ namespace printf_core {

// These length modifiers match the length modifiers in the format string, which
// is why they are formatted differently from the rest of the file.
enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };

struct LengthSpec {
LengthModifier lm;
size_t bit_width;
};

enum FormatFlags : uint8_t {
LEFT_JUSTIFIED = 0x01, // -
Expand All @@ -42,6 +47,7 @@ struct FormatSection {
// Format Specifier Values
FormatFlags flags = FormatFlags(0);
LengthModifier length_modifier = LengthModifier::none;
size_t bit_width = 0;
int min_width = 0;
int precision = -1;

Expand All @@ -64,6 +70,7 @@ struct FormatSection {
if (!((static_cast<uint8_t>(flags) ==
static_cast<uint8_t>(other.flags)) &&
(min_width == other.min_width) && (precision == other.precision) &&
(bit_width == other.bit_width) &&
(length_modifier == other.length_modifier) &&
(conv_name == other.conv_name)))
return false;
Expand Down
5 changes: 2 additions & 3 deletions libc/src/stdio/printf_core/int_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
bool is_negative = false;
FormatFlags flags = to_conv.flags;

const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';

// If the conversion is signed, then handle negative values.
Expand All @@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
}

num = apply_length_modifier(num, to_conv.length_modifier);

num =
apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
cpp::array<char, details::num_buf_size()> buf;
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
if (!str)
Expand Down
69 changes: 56 additions & 13 deletions libc/src/stdio/printf_core/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
}
}

LengthModifier lm = parse_length_modifier(&cur_pos);

auto [lm, bw] = parse_length_modifier(&cur_pos);
section.length_modifier = lm;
section.conv_name = str[cur_pos];
section.bit_width = bw;
switch (str[cur_pos]) {
case ('%'):
// Regardless of options, a % conversion is always safe. The standard
Expand Down Expand Up @@ -188,6 +188,21 @@ template <typename ArgProvider> class Parser {

WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
break;

case (LengthModifier::w):
case (LengthModifier::wf):
if (bw == 0) {
section.has_conv = false;
} else if (bw <= INT_WIDTH) {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
} else if (bw <= LONG_WIDTH) {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
} else if (bw <= LLONG_WIDTH) {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
} else {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
}
break;
}
break;
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
Expand Down Expand Up @@ -273,38 +288,54 @@ template <typename ArgProvider> class Parser {
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
switch (str[*local_pos]) {
case ('l'):
if (str[*local_pos + 1] == 'l') {
*local_pos += 2;
return LengthModifier::ll;
return {LengthModifier::ll, 0};
} else {
++*local_pos;
return LengthModifier::l;
return {LengthModifier::l, 0};
}
case ('w'): {
LengthModifier lm;
if (str[*local_pos + 1] == 'f') {
*local_pos += 2;
lm = LengthModifier::wf;
} else {
++*local_pos;
lm = LengthModifier::w;
}
if (internal::isdigit(str[*local_pos])) {
const auto result = internal::strtointeger<int>(str + *local_pos, 10);
*local_pos += result.parsed_len;
return {lm, static_cast<size_t>(cpp::max(0, result.value))};
}
return {lm, 0};
}
case ('h'):
if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
return LengthModifier::hh;
return {LengthModifier::hh, 0};
} else {
++*local_pos;
return LengthModifier::h;
return {LengthModifier::h, 0};
}
case ('L'):
++*local_pos;
return LengthModifier::L;
return {LengthModifier::L, 0};
case ('j'):
++*local_pos;
return LengthModifier::j;
return {LengthModifier::j, 0};
case ('z'):
++*local_pos;
return LengthModifier::z;
return {LengthModifier::z, 0};
case ('t'):
++*local_pos;
return LengthModifier::t;
return {LengthModifier::t, 0};
default:
return LengthModifier::none;
return {LengthModifier::none, 0};
}
}

Expand Down Expand Up @@ -460,7 +491,7 @@ template <typename ArgProvider> class Parser {
}
}

LengthModifier lm = parse_length_modifier(&local_pos);
auto [lm, bw] = parse_length_modifier(&local_pos);

// if we don't have an index for this conversion, then its position is
// unknown and all this information is irrelevant. The rest of this
Expand Down Expand Up @@ -511,6 +542,18 @@ template <typename ArgProvider> class Parser {
case (LengthModifier::t):
conv_size = type_desc_from_type<ptrdiff_t>();
break;
case (LengthModifier::w):
case (LengthModifier::wf):
if (bw <= INT_WIDTH) {
conv_size = type_desc_from_type<int>();
} else if (bw <= LONG_WIDTH) {
conv_size = type_desc_from_type<long>();
} else if (bw <= LLONG_WIDTH) {
conv_size = type_desc_from_type<long long>();
} else {
conv_size = type_desc_from_type<intmax_t>();
}
break;
}
break;
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
Expand Down
2 changes: 2 additions & 0 deletions libc/src/stdio/printf_core/write_int_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
break;
case LengthModifier::j:
case LengthModifier::w:
case LengthModifier::wf:
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
break;
}
Expand Down
6 changes: 6 additions & 0 deletions libc/test/UnitTest/PrintfMatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ namespace {
case (LengthModifier::lm): \
tlog << #lm; \
break
#define CASE_LM_BIT_WIDTH(lm, bw) \
case (LengthModifier::lm): \
tlog << #lm << "\n\tbit width: :" << bw; \
break

static void display(FormatSection form) {
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
Expand Down Expand Up @@ -67,6 +71,8 @@ static void display(FormatSection form) {
CASE_LM(z);
CASE_LM(t);
CASE_LM(L);
CASE_LM_BIT_WIDTH(w, form.bit_width);
CASE_LM_BIT_WIDTH(wf, form.bit_width);
}
tlog << "\n";
tlog << "\tconversion name: " << form.conv_name << "\n";
Expand Down
36 changes: 36 additions & 0 deletions libc/test/src/stdio/printf_core/parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "%w32d";
long long arg1 = 12345;
evaluate(format_arr, str, arg1);

LIBC_NAMESPACE::printf_core::FormatSection expected;
expected.has_conv = true;

expected.raw_string = {str, 5};
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
expected.bit_width = 32;
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "%wf32d";
long long arg1 = 12345;
evaluate(format_arr, str, arg1);

LIBC_NAMESPACE::printf_core::FormatSection expected;
expected.has_conv = true;

expected.raw_string = {str, 6};
expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
expected.bit_width = 32;
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "% -056.78jd";
Expand Down
86 changes: 86 additions & 0 deletions libc/test/src/stdio/sprintf_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,92 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min

written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you also need tests that check the behavior of incorrect bit-widths. You need at least one test each for %w0d, %w999d, and %w-1d.

EXPECT_EQ(written, 1);
ASSERT_STREQ(buff, "7");

written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
EXPECT_EQ(written, 1);
ASSERT_STREQ(buff, "1");

written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807ll);
EXPECT_EQ(written, 19);
ASSERT_STREQ(buff, "9223372036854775807");

written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807);
EXPECT_EQ(written, 5);
ASSERT_STREQ(buff, "%w-1d");

written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807);
EXPECT_EQ(written, 4);
ASSERT_STREQ(buff, "%w0d");

written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807ll);
EXPECT_EQ(written, 19);
ASSERT_STREQ(buff, "9223372036854775807");

written = LIBC_NAMESPACE::sprintf(buff, "%winvalid%w1d", 5807, 5807);
EXPECT_EQ(written, 10);
ASSERT_STREQ(buff, "%winvalid1");

written = LIBC_NAMESPACE::sprintf(buff, "%w-1d%w1d", 5807, 5807);
EXPECT_EQ(written, 6);
ASSERT_STREQ(buff, "%w-1d1");

char format[64];
char uintmax[128];
LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
EXPECT_EQ(written, uintmax_len);
ASSERT_STREQ(buff, uintmax);

written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "18446744073709551615"); // ull max

written =
LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min

written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
EXPECT_EQ(written, 1);
ASSERT_STREQ(buff, "7");

written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
EXPECT_EQ(written, 1);
ASSERT_STREQ(buff, "1");

written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "18446744073709551615"); // ull max

written =
LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min

written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807);
EXPECT_EQ(written, 5);
ASSERT_STREQ(buff, "%wf0d");

written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807);
EXPECT_EQ(written, 6);
ASSERT_STREQ(buff, "%wf-1d");

written = LIBC_NAMESPACE::sprintf(buff, "%wfinvalid%wf1d", 5807, 5807);
EXPECT_EQ(written, 11);
ASSERT_STREQ(buff, "%wfinvalid1");

written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d%wf1d", 5807, 5807);
EXPECT_EQ(written, 7);
ASSERT_STREQ(buff, "%wf-1d1");

written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 9223372036854775807ll);
EXPECT_EQ(written, 19);
ASSERT_STREQ(buff, "9223372036854775807");

// Min Width Tests.

written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);
Expand Down