Skip to content

Commit 392e448

Browse files
committed
Vendor string parsing functions from syn crate
String parsing is very commonly used in procedural macros, so vendor in relevant bits from syn and share them among proc_macro crates. To support this rustc_procmacro_library is added to Makefile as it should be built for host in the event of cross-compilation. Signed-off-by: Gary Guo <[email protected]>
1 parent 124eb66 commit 392e448

File tree

3 files changed

+273
-3
lines changed

3 files changed

+273
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
*.o.*
3939
*.patch
4040
*.rmeta
41+
*.rlib
4142
*.s
4243
*.so
4344
*.so.dbg

rust/Makefile

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,15 +104,30 @@ $(objtree)/rust/exports_kernel_generated.h: $(objtree)/rust/kernel.o FORCE
104104
# avoid the https://github.com/rust-lang/rust/issues/82320 rustc crash.
105105
quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@
106106
cmd_rustc_procmacro = \
107-
$(RUSTC_OR_CLIPPY) $(rustc_flags) \
107+
$(RUSTC_OR_CLIPPY) $(rustc_flags) $(rustc_target_flags) \
108108
--emit=dep-info,link --extern proc_macro \
109109
-Cpanic=unwind -Cforce-unwind-tables=y \
110-
--crate-type proc-macro --out-dir $(objtree)/rust/ \
110+
--crate-type proc-macro --out-dir $(objtree)/rust/ -L $(objtree)/rust/ \
111111
--crate-name $(patsubst lib%.so,%,$(notdir $@)) $<; \
112112
mv $(objtree)/rust/$(patsubst lib%.so,%,$(notdir $@)).d $(depfile); \
113113
sed -i '/^\#/d' $(depfile)
114114

115-
$(objtree)/rust/libmodule.so: $(srctree)/rust/module.rs FORCE
115+
quiet_cmd_rustc_procmacro_library = $(RUSTC_OR_CLIPPY_QUIET) P $@
116+
cmd_rustc_procmacro_library = \
117+
$(RUSTC_OR_CLIPPY) $(rustc_flags) $(rustc_target_flags) \
118+
--emit=dep-info,link \
119+
-Cpanic=unwind -Cforce-unwind-tables=y \
120+
--crate-type rlib --out-dir $(objtree)/rust/ -L $(objtree)/rust/ \
121+
--crate-name $(patsubst lib%.rlib,%,$(notdir $@)) $<; \
122+
mv $(objtree)/rust/$(patsubst lib%.rlib,%,$(notdir $@)).d $(depfile); \
123+
sed -i '/^\#/d' $(depfile)
124+
125+
$(objtree)/rust/libparse.rlib: $(srctree)/rust/parse.rs FORCE
126+
$(call if_changed_dep,rustc_procmacro_library)
127+
128+
$(objtree)/rust/libmodule.so: private rustc_target_flags = --extern parse
129+
$(objtree)/rust/libmodule.so: $(srctree)/rust/module.rs \
130+
$(objtree)/rust/libparse.rlib FORCE
116131
$(call if_changed_dep,rustc_procmacro)
117132

118133
quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L $@

rust/parse.rs

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
// SPDX-License-Identifier: MIT or Apache-2.0
2+
3+
//! Shared parsing functions for use in procedural macros. These functions are
4+
//! from [syn](https://github.com/dtolnay/syn).
5+
6+
use std::char;
7+
use std::ops::{Index, RangeFrom};
8+
9+
/// Get the byte at offset idx, or a default of `b'\0'` if we're looking
10+
/// past the end of the input buffer.
11+
pub fn byte<S: AsRef<[u8]> + ?Sized>(s: &S, idx: usize) -> u8 {
12+
let s = s.as_ref();
13+
if idx < s.len() {
14+
s[idx]
15+
} else {
16+
0
17+
}
18+
}
19+
20+
fn next_chr(s: &str) -> char {
21+
s.chars().next().unwrap_or('\0')
22+
}
23+
24+
// Returns (content, suffix).
25+
pub fn parse_lit_str(s: &str) -> (Box<str>, Box<str>) {
26+
match byte(s, 0) {
27+
b'"' => parse_lit_str_cooked(s),
28+
b'r' => parse_lit_str_raw(s),
29+
_ => unreachable!(),
30+
}
31+
}
32+
33+
// Clippy false positive
34+
// https://github.com/rust-lang-nursery/rust-clippy/issues/2329
35+
#[allow(clippy::needless_continue)]
36+
fn parse_lit_str_cooked(mut s: &str) -> (Box<str>, Box<str>) {
37+
assert_eq!(byte(s, 0), b'"');
38+
s = &s[1..];
39+
40+
let mut content = String::new();
41+
'outer: loop {
42+
let ch = match byte(s, 0) {
43+
b'"' => break,
44+
b'\\' => {
45+
let b = byte(s, 1);
46+
s = &s[2..];
47+
match b {
48+
b'x' => {
49+
let (byte, rest) = backslash_x(s);
50+
s = rest;
51+
assert!(byte <= 0x80, "Invalid \\x byte in string literal");
52+
char::from_u32(u32::from(byte)).unwrap()
53+
}
54+
b'u' => {
55+
let (chr, rest) = backslash_u(s);
56+
s = rest;
57+
chr
58+
}
59+
b'n' => '\n',
60+
b'r' => '\r',
61+
b't' => '\t',
62+
b'\\' => '\\',
63+
b'0' => '\0',
64+
b'\'' => '\'',
65+
b'"' => '"',
66+
b'\r' | b'\n' => loop {
67+
let ch = next_chr(s);
68+
if ch.is_whitespace() {
69+
s = &s[ch.len_utf8()..];
70+
} else {
71+
continue 'outer;
72+
}
73+
},
74+
b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
75+
}
76+
}
77+
b'\r' => {
78+
assert_eq!(byte(s, 1), b'\n', "Bare CR not allowed in string");
79+
s = &s[2..];
80+
'\n'
81+
}
82+
_ => {
83+
let ch = next_chr(s);
84+
s = &s[ch.len_utf8()..];
85+
ch
86+
}
87+
};
88+
content.push(ch);
89+
}
90+
91+
assert!(s.starts_with('"'));
92+
let content = content.into_boxed_str();
93+
let suffix = s[1..].to_owned().into_boxed_str();
94+
(content, suffix)
95+
}
96+
97+
fn parse_lit_str_raw(mut s: &str) -> (Box<str>, Box<str>) {
98+
assert_eq!(byte(s, 0), b'r');
99+
s = &s[1..];
100+
101+
let mut pounds = 0;
102+
while byte(s, pounds) == b'#' {
103+
pounds += 1;
104+
}
105+
assert_eq!(byte(s, pounds), b'"');
106+
let close = s.rfind('"').unwrap();
107+
for end in s[close + 1..close + 1 + pounds].bytes() {
108+
assert_eq!(end, b'#');
109+
}
110+
111+
let content = s[pounds + 1..close].to_owned().into_boxed_str();
112+
let suffix = s[close + 1 + pounds..].to_owned().into_boxed_str();
113+
(content, suffix)
114+
}
115+
116+
// Returns (content, suffix).
117+
pub fn parse_lit_byte_str(s: &str) -> (Vec<u8>, Box<str>) {
118+
assert_eq!(byte(s, 0), b'b');
119+
match byte(s, 1) {
120+
b'"' => parse_lit_byte_str_cooked(s),
121+
b'r' => parse_lit_byte_str_raw(s),
122+
_ => unreachable!(),
123+
}
124+
}
125+
126+
// Clippy false positive
127+
// https://github.com/rust-lang-nursery/rust-clippy/issues/2329
128+
#[allow(clippy::needless_continue)]
129+
fn parse_lit_byte_str_cooked(mut s: &str) -> (Vec<u8>, Box<str>) {
130+
assert_eq!(byte(s, 0), b'b');
131+
assert_eq!(byte(s, 1), b'"');
132+
s = &s[2..];
133+
134+
// We're going to want to have slices which don't respect codepoint boundaries.
135+
let mut v = s.as_bytes();
136+
137+
let mut out = Vec::new();
138+
'outer: loop {
139+
let byte = match byte(v, 0) {
140+
b'"' => break,
141+
b'\\' => {
142+
let b = byte(v, 1);
143+
v = &v[2..];
144+
match b {
145+
b'x' => {
146+
let (b, rest) = backslash_x(v);
147+
v = rest;
148+
b
149+
}
150+
b'n' => b'\n',
151+
b'r' => b'\r',
152+
b't' => b'\t',
153+
b'\\' => b'\\',
154+
b'0' => b'\0',
155+
b'\'' => b'\'',
156+
b'"' => b'"',
157+
b'\r' | b'\n' => loop {
158+
let byte = byte(v, 0);
159+
let ch = char::from_u32(u32::from(byte)).unwrap();
160+
if ch.is_whitespace() {
161+
v = &v[1..];
162+
} else {
163+
continue 'outer;
164+
}
165+
},
166+
b => panic!("unexpected byte {:?} after \\ character in byte literal", b),
167+
}
168+
}
169+
b'\r' => {
170+
assert_eq!(byte(v, 1), b'\n', "Bare CR not allowed in string");
171+
v = &v[2..];
172+
b'\n'
173+
}
174+
b => {
175+
v = &v[1..];
176+
b
177+
}
178+
};
179+
out.push(byte);
180+
}
181+
182+
assert_eq!(byte(v, 0), b'"');
183+
let suffix = s[s.len() - v.len() + 1..].to_owned().into_boxed_str();
184+
(out, suffix)
185+
}
186+
187+
fn parse_lit_byte_str_raw(s: &str) -> (Vec<u8>, Box<str>) {
188+
assert_eq!(byte(s, 0), b'b');
189+
let (value, suffix) = parse_lit_str_raw(&s[1..]);
190+
(String::from(value).into_bytes(), suffix)
191+
}
192+
193+
fn backslash_x<S>(s: &S) -> (u8, &S)
194+
where
195+
S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized,
196+
{
197+
let mut ch = 0;
198+
let b0 = byte(s, 0);
199+
let b1 = byte(s, 1);
200+
ch += 0x10
201+
* match b0 {
202+
b'0'..=b'9' => b0 - b'0',
203+
b'a'..=b'f' => 10 + (b0 - b'a'),
204+
b'A'..=b'F' => 10 + (b0 - b'A'),
205+
_ => panic!("unexpected non-hex character after \\x"),
206+
};
207+
ch += match b1 {
208+
b'0'..=b'9' => b1 - b'0',
209+
b'a'..=b'f' => 10 + (b1 - b'a'),
210+
b'A'..=b'F' => 10 + (b1 - b'A'),
211+
_ => panic!("unexpected non-hex character after \\x"),
212+
};
213+
(ch, &s[2..])
214+
}
215+
216+
fn backslash_u(mut s: &str) -> (char, &str) {
217+
if byte(s, 0) != b'{' {
218+
panic!("{}", "expected { after \\u");
219+
}
220+
s = &s[1..];
221+
222+
let mut ch = 0;
223+
let mut digits = 0;
224+
loop {
225+
let b = byte(s, 0);
226+
let digit = match b {
227+
b'0'..=b'9' => b - b'0',
228+
b'a'..=b'f' => 10 + b - b'a',
229+
b'A'..=b'F' => 10 + b - b'A',
230+
b'_' if digits > 0 => {
231+
s = &s[1..];
232+
continue;
233+
}
234+
b'}' if digits == 0 => panic!("invalid empty unicode escape"),
235+
b'}' => break,
236+
_ => panic!("unexpected non-hex character after \\u"),
237+
};
238+
if digits == 6 {
239+
panic!("overlong unicode escape (must have at most 6 hex digits)");
240+
}
241+
ch *= 0x10;
242+
ch += u32::from(digit);
243+
digits += 1;
244+
s = &s[1..];
245+
}
246+
assert!(byte(s, 0) == b'}');
247+
s = &s[1..];
248+
249+
if let Some(ch) = char::from_u32(ch) {
250+
(ch, s)
251+
} else {
252+
panic!("character code {:x} is not a valid unicode character", ch);
253+
}
254+
}

0 commit comments

Comments
 (0)