Skip to content

Commit 4f4752e

Browse files
[libc][NFC] implement printf parser
This patch adds the sequential mode implementation of the printf parser, as well as unit tests for it. In addition it adjusts the surrounding files to accomodate changes in the design found in the implementation process. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D123339
1 parent 72cd50b commit 4f4752e

File tree

12 files changed

+646
-109
lines changed

12 files changed

+646
-109
lines changed

libc/src/stdio/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
add_subdirectory(printf_core)
2+
13
add_entrypoint_object(
24
fopen
35
SRCS
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
add_header_library(
3+
core_structs
4+
HDRS
5+
core_structs.h
6+
)
7+
8+
add_object_library(
9+
parser
10+
SRCS
11+
parser.cpp
12+
HDRS
13+
parser.h
14+
DEPENDS
15+
.core_structs
16+
libc.src.__support.arg_list
17+
libc.src.__support.ctype_utils
18+
libc.src.__support.str_to_integer
19+
libc.src.__support.CPP.bit
20+
21+
)

libc/src/stdio/printf_files/converter.h renamed to libc/src/stdio/printf_core/converter.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CONVERTER_H
10-
#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CONVERTER_H
9+
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_H
10+
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_H
1111

12-
#include "src/stdio/printf_files/core_structs.h"
13-
#include "src/stdio/printf_files/writer.h"
12+
#include "src/stdio/printf_core/core_structs.h"
13+
#include "src/stdio/printf_core/writer.h"
1414

1515
#include <stddef.h>
1616

@@ -32,4 +32,4 @@ class Converter {
3232
} // namespace printf_core
3333
} // namespace __llvm_libc
3434

35-
#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CONVERTER_H
35+
#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_H

libc/src/stdio/printf_files/core_structs.h renamed to libc/src/stdio/printf_core/core_structs.h

Lines changed: 18 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,29 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CORE_STRUCTS_H
10-
#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CORE_STRUCTS_H
9+
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H
10+
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H
1111

1212
#include <inttypes.h>
1313
#include <stddef.h>
1414

1515
namespace __llvm_libc {
1616
namespace printf_core {
1717

18+
// These length modifiers match the length modifiers in the format string, which
19+
// is why they are formatted differently from the rest of the file.
1820
enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
19-
enum VariableType : uint8_t {
20-
// Types
2121

22-
Void = 0x00,
23-
Char = 0x01,
24-
// WChar = 0x02,
25-
// WInt = 0x03,
26-
Short = 0x04,
27-
Int = 0x05,
28-
Long = 0x06,
29-
LLong = 0x07,
30-
Intmax = 0x08,
31-
Size = 0x09,
32-
Ptrdiff = 0x0a,
33-
Double = 0x0b,
34-
LDouble = 0x0c,
22+
enum FormatFlags : uint8_t {
23+
LEFT_JUSTIFIED = 0x01, // -
24+
FORCE_SIGN = 0x02, // +
25+
SPACE_PREFIX = 0x04, // space
26+
ALTERNATE_FORM = 0x08, // #
27+
LEADING_ZEROES = 0x10, // 0
3528

36-
// Modifiers
37-
38-
Signed = 0x40,
39-
Pointer = 0x80,
40-
41-
// Masks
42-
43-
Type_Mask = 0x3f,
44-
Modifier_Mask = 0xc,
29+
// These flags come from the GNU extensions which aren't yet implemented.
30+
// group_decimals = 0x20, // '
31+
// locale_digits = 0x40, // I
4532
};
4633

4734
struct FormatSection {
@@ -51,14 +38,10 @@ struct FormatSection {
5138
size_t raw_len;
5239

5340
// Format Specifier Values
54-
bool left_justified;
55-
bool force_sign;
56-
bool space_prefix;
57-
bool alt_form;
58-
bool leading_zeroes;
59-
LengthModifier length_modifier;
60-
int min_width;
61-
int precision;
41+
FormatFlags flags = FormatFlags(0);
42+
LengthModifier length_modifier = LengthModifier::none;
43+
int min_width = 0;
44+
int precision = -1;
6245

6346
__uint128_t conv_val_raw; // Needs to be large enough to hold a long double.
6447
void *conv_val_ptr;
@@ -69,4 +52,4 @@ struct FormatSection {
6952
} // namespace printf_core
7053
} // namespace __llvm_libc
7154

72-
#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CORE_STRUCTS_H
55+
#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H

libc/src/stdio/printf_core/parser.cpp

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
//===-- Format string parser implementation for printf ----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "parser.h"
10+
11+
#include "src/__support/arg_list.h"
12+
13+
#include "src/__support/CPP/Bit.h"
14+
#include "src/__support/ctype_utils.h"
15+
#include "src/__support/str_to_integer.h"
16+
17+
namespace __llvm_libc {
18+
namespace printf_core {
19+
20+
#define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
21+
22+
#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
23+
#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
24+
#else
25+
#define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
26+
#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
27+
28+
FormatSection Parser::get_next_section() {
29+
FormatSection section;
30+
section.raw_string = str + cur_pos;
31+
size_t starting_pos = cur_pos;
32+
if (str[cur_pos] == '%') {
33+
// format section
34+
section.has_conv = true;
35+
36+
++cur_pos;
37+
[[maybe_unused]] size_t conv_index = 0;
38+
39+
section.flags = parse_flags(&cur_pos);
40+
41+
// handle width
42+
section.min_width = 0;
43+
if (str[cur_pos] == '*') {
44+
++cur_pos;
45+
46+
section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
47+
} else if (internal::isdigit(str[cur_pos])) {
48+
char *int_end;
49+
section.min_width =
50+
internal::strtointeger<int>(str + cur_pos, &int_end, 10);
51+
cur_pos = int_end - str;
52+
}
53+
if (section.min_width < 0) {
54+
section.min_width = -section.min_width;
55+
section.flags =
56+
static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
57+
}
58+
59+
// handle precision
60+
section.precision = -1; // negative precisions are ignored.
61+
if (str[cur_pos] == '.') {
62+
++cur_pos;
63+
section.precision = 0; // if there's a . but no specified precision, the
64+
// precision is implicitly 0.
65+
if (str[cur_pos] == '*') {
66+
++cur_pos;
67+
68+
section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
69+
70+
} else if (internal::isdigit(str[cur_pos])) {
71+
char *int_end;
72+
section.precision =
73+
internal::strtointeger<int>(str + cur_pos, &int_end, 10);
74+
cur_pos = int_end - str;
75+
}
76+
}
77+
78+
LengthModifier lm = parse_length_modifier(&cur_pos);
79+
80+
section.length_modifier = lm;
81+
section.conv_name = str[cur_pos];
82+
switch (str[cur_pos]) {
83+
case ('%'):
84+
break;
85+
case ('c'):
86+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
87+
break;
88+
case ('d'):
89+
case ('i'):
90+
case ('o'):
91+
case ('x'):
92+
case ('X'):
93+
case ('u'):
94+
switch (lm) {
95+
case (LengthModifier::hh):
96+
case (LengthModifier::h):
97+
case (LengthModifier::none):
98+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
99+
break;
100+
case (LengthModifier::l):
101+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
102+
break;
103+
case (LengthModifier::ll):
104+
case (LengthModifier::L): // This isn't in the standard, but is in other
105+
// libc implementations.
106+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
107+
break;
108+
case (LengthModifier::j):
109+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
110+
break;
111+
case (LengthModifier::z):
112+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
113+
break;
114+
case (LengthModifier::t):
115+
section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptrdiff_t, conv_index);
116+
break;
117+
}
118+
break;
119+
case ('f'):
120+
case ('F'):
121+
case ('e'):
122+
case ('E'):
123+
case ('a'):
124+
case ('A'):
125+
case ('g'):
126+
case ('G'):
127+
if (lm != LengthModifier::L)
128+
section.conv_val_raw =
129+
bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
130+
else
131+
section.conv_val_raw = bit_cast<__uint128_t>(
132+
GET_ARG_VAL_SIMPLEST(long double, conv_index));
133+
break;
134+
case ('n'):
135+
case ('p'):
136+
case ('s'):
137+
section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
138+
break;
139+
default:
140+
// if the conversion is undefined, change this to a raw section.
141+
section.has_conv = false;
142+
break;
143+
}
144+
++cur_pos;
145+
} else {
146+
// raw section
147+
section.has_conv = false;
148+
while (str[cur_pos] != '%' && str[cur_pos] != '\0')
149+
++cur_pos;
150+
}
151+
section.raw_len = cur_pos - starting_pos;
152+
return section;
153+
}
154+
155+
FormatFlags Parser::parse_flags(size_t *local_pos) {
156+
bool found_flag = true;
157+
FormatFlags flags = FormatFlags(0);
158+
while (found_flag) {
159+
switch (str[*local_pos]) {
160+
case '-':
161+
flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
162+
break;
163+
case '+':
164+
flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
165+
break;
166+
case ' ':
167+
flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
168+
break;
169+
case '#':
170+
flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
171+
break;
172+
case '0':
173+
flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
174+
break;
175+
default:
176+
found_flag = false;
177+
}
178+
if (found_flag)
179+
++*local_pos;
180+
}
181+
return flags;
182+
}
183+
184+
LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
185+
switch (str[*local_pos]) {
186+
case ('l'):
187+
if (str[*local_pos + 1] == 'l') {
188+
*local_pos += 2;
189+
return LengthModifier::ll;
190+
} else {
191+
++*local_pos;
192+
return LengthModifier::l;
193+
}
194+
case ('h'):
195+
if (str[cur_pos + 1] == 'h') {
196+
*local_pos += 2;
197+
return LengthModifier::hh;
198+
} else {
199+
++*local_pos;
200+
return LengthModifier::h;
201+
}
202+
case ('L'):
203+
++*local_pos;
204+
return LengthModifier::L;
205+
case ('j'):
206+
++*local_pos;
207+
return LengthModifier::j;
208+
case ('z'):
209+
++*local_pos;
210+
return LengthModifier::z;
211+
case ('t'):
212+
++*local_pos;
213+
return LengthModifier::t;
214+
default:
215+
return LengthModifier::none;
216+
}
217+
}
218+
219+
} // namespace printf_core
220+
} // namespace __llvm_libc

0 commit comments

Comments
 (0)