Skip to content

Commit 7a52f83

Browse files
bors[bot]Veykril
andauthored
Merge #11294
11294: internal: Move format specifier lexing from syntax to ide_db r=Veykril a=Veykril bors r+ Co-authored-by: Lukas Wirth <[email protected]>
2 parents 4c34909 + 91279db commit 7a52f83

File tree

5 files changed

+301
-315
lines changed

5 files changed

+301
-315
lines changed

crates/ide/src/syntax_highlighting.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use hir::{InFile, Name, Semantics};
1616
use ide_db::RootDatabase;
1717
use rustc_hash::FxHashMap;
1818
use syntax::{
19-
ast::{self, HasFormatSpecifier},
19+
ast::{self, IsString},
2020
AstNode, AstToken, NodeOrToken,
2121
SyntaxKind::*,
2222
SyntaxNode, TextRange, WalkEvent, T,
@@ -336,17 +336,19 @@ fn traverse(
336336
}
337337
highlight_format_string(hl, &string, &expanded_string, range);
338338
// Highlight escape sequences
339-
if let Some(char_ranges) = string.char_ranges() {
340-
for (piece_range, _) in char_ranges.iter().filter(|(_, char)| char.is_ok()) {
341-
if string.text()[piece_range.start().into()..].starts_with('\\') {
342-
hl.add(HlRange {
343-
range: piece_range + range.start(),
344-
highlight: HlTag::EscapeSequence.into(),
345-
binding_hash: None,
346-
});
347-
}
339+
string.escaped_char_ranges(&mut |piece_range, char| {
340+
if char.is_err() {
341+
return;
348342
}
349-
}
343+
344+
if string.text()[piece_range.start().into()..].starts_with('\\') {
345+
hl.add(HlRange {
346+
range: piece_range + range.start(),
347+
highlight: HlTag::EscapeSequence.into(),
348+
binding_hash: None,
349+
});
350+
}
351+
});
350352
}
351353
}
352354

crates/ide/src/syntax_highlighting/format.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
//! Syntax highlighting for format macro strings.
2-
use ide_db::{helpers::format_string::is_format_string, SymbolKind};
3-
use syntax::{
4-
ast::{self, FormatSpecifier, HasFormatSpecifier},
5-
TextRange,
2+
use ide_db::{
3+
helpers::format_string::{is_format_string, lex_format_specifiers, FormatSpecifier},
4+
SymbolKind,
65
};
6+
use syntax::{ast, TextRange};
77

88
use crate::{syntax_highlighting::highlights::Highlights, HlRange, HlTag};
99

@@ -17,7 +17,7 @@ pub(super) fn highlight_format_string(
1717
return;
1818
}
1919

20-
string.lex_format_specifier(|piece_range, kind| {
20+
lex_format_specifiers(string, &mut |piece_range, kind| {
2121
if let Some(highlight) = highlight_format_specifier(kind) {
2222
stack.add(HlRange {
2323
range: piece_range + range.start(),

crates/ide_db/src/helpers/format_string.rs

Lines changed: 263 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
//! Tools to work with format string literals for the `format_args!` family of macros.
2-
use syntax::{ast, AstNode, AstToken};
2+
use syntax::{
3+
ast::{self, IsString},
4+
AstNode, AstToken, TextRange,
5+
};
36

47
pub fn is_format_string(string: &ast::String) -> bool {
58
// Check if `string` is a format string argument of a macro invocation.
@@ -10,7 +13,7 @@ pub fn is_format_string(string: &ast::String) -> bool {
1013
//
1114
// This setup lets us correctly highlight the components of `concat!("{}", "bla")` format
1215
// strings. It still fails for `concat!("{", "}")`, but that is rare.
13-
16+
format!("{string} {bar}", bar = string);
1417
(|| {
1518
let macro_call = string.syntax().ancestors().find_map(ast::MacroCall::cast)?;
1619
let name = macro_call.path()?.segment()?.name_ref()?;
@@ -29,3 +32,261 @@ pub fn is_format_string(string: &ast::String) -> bool {
2932
})()
3033
.is_some()
3134
}
35+
36+
#[derive(Debug)]
37+
pub enum FormatSpecifier {
38+
Open,
39+
Close,
40+
Integer,
41+
Identifier,
42+
Colon,
43+
Fill,
44+
Align,
45+
Sign,
46+
NumberSign,
47+
Zero,
48+
DollarSign,
49+
Dot,
50+
Asterisk,
51+
QuestionMark,
52+
}
53+
54+
pub fn lex_format_specifiers(
55+
string: &ast::String,
56+
mut callback: &mut dyn FnMut(TextRange, FormatSpecifier),
57+
) {
58+
let mut char_ranges = Vec::new();
59+
string.escaped_char_ranges(&mut |range, res| char_ranges.push((range, res)));
60+
let mut chars = char_ranges
61+
.iter()
62+
.filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?)))
63+
.peekable();
64+
65+
while let Some((range, first_char)) = chars.next() {
66+
if let '{' = first_char {
67+
// Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
68+
if let Some((_, '{')) = chars.peek() {
69+
// Escaped format specifier, `{{`
70+
chars.next();
71+
continue;
72+
}
73+
74+
callback(range, FormatSpecifier::Open);
75+
76+
// check for integer/identifier
77+
let (_, int_char) = chars.peek().copied().unwrap_or_default();
78+
match int_char {
79+
// integer
80+
'0'..='9' => read_integer(&mut chars, &mut callback),
81+
// identifier
82+
c if c == '_' || c.is_alphabetic() => read_identifier(&mut chars, &mut callback),
83+
_ => {}
84+
}
85+
86+
if let Some((_, ':')) = chars.peek() {
87+
skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
88+
89+
// check for fill/align
90+
let mut cloned = chars.clone().take(2);
91+
let (_, first) = cloned.next().unwrap_or_default();
92+
let (_, second) = cloned.next().unwrap_or_default();
93+
match second {
94+
'<' | '^' | '>' => {
95+
// alignment specifier, first char specifies fillment
96+
skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback);
97+
skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
98+
}
99+
_ => {
100+
if let '<' | '^' | '>' = first {
101+
skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
102+
}
103+
}
104+
}
105+
106+
// check for sign
107+
match chars.peek().copied().unwrap_or_default().1 {
108+
'+' | '-' => {
109+
skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback);
110+
}
111+
_ => {}
112+
}
113+
114+
// check for `#`
115+
if let Some((_, '#')) = chars.peek() {
116+
skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback);
117+
}
118+
119+
// check for `0`
120+
let mut cloned = chars.clone().take(2);
121+
let first = cloned.next().map(|next| next.1);
122+
let second = cloned.next().map(|next| next.1);
123+
124+
if first == Some('0') && second != Some('$') {
125+
skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
126+
}
127+
128+
// width
129+
match chars.peek().copied().unwrap_or_default().1 {
130+
'0'..='9' => {
131+
read_integer(&mut chars, &mut callback);
132+
if let Some((_, '$')) = chars.peek() {
133+
skip_char_and_emit(
134+
&mut chars,
135+
FormatSpecifier::DollarSign,
136+
&mut callback,
137+
);
138+
}
139+
}
140+
c if c == '_' || c.is_alphabetic() => {
141+
read_identifier(&mut chars, &mut callback);
142+
143+
if chars.peek().map(|&(_, c)| c) == Some('?') {
144+
skip_char_and_emit(
145+
&mut chars,
146+
FormatSpecifier::QuestionMark,
147+
&mut callback,
148+
);
149+
}
150+
151+
// can be either width (indicated by dollar sign, or type in which case
152+
// the next sign has to be `}`)
153+
let next = chars.peek().map(|&(_, c)| c);
154+
155+
match next {
156+
Some('$') => skip_char_and_emit(
157+
&mut chars,
158+
FormatSpecifier::DollarSign,
159+
&mut callback,
160+
),
161+
Some('}') => {
162+
skip_char_and_emit(
163+
&mut chars,
164+
FormatSpecifier::Close,
165+
&mut callback,
166+
);
167+
continue;
168+
}
169+
_ => continue,
170+
};
171+
}
172+
_ => {}
173+
}
174+
175+
// precision
176+
if let Some((_, '.')) = chars.peek() {
177+
skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
178+
179+
match chars.peek().copied().unwrap_or_default().1 {
180+
'*' => {
181+
skip_char_and_emit(
182+
&mut chars,
183+
FormatSpecifier::Asterisk,
184+
&mut callback,
185+
);
186+
}
187+
'0'..='9' => {
188+
read_integer(&mut chars, &mut callback);
189+
if let Some((_, '$')) = chars.peek() {
190+
skip_char_and_emit(
191+
&mut chars,
192+
FormatSpecifier::DollarSign,
193+
&mut callback,
194+
);
195+
}
196+
}
197+
c if c == '_' || c.is_alphabetic() => {
198+
read_identifier(&mut chars, &mut callback);
199+
if chars.peek().map(|&(_, c)| c) != Some('$') {
200+
continue;
201+
}
202+
skip_char_and_emit(
203+
&mut chars,
204+
FormatSpecifier::DollarSign,
205+
&mut callback,
206+
);
207+
}
208+
_ => {
209+
continue;
210+
}
211+
}
212+
}
213+
214+
// type
215+
match chars.peek().copied().unwrap_or_default().1 {
216+
'?' => {
217+
skip_char_and_emit(
218+
&mut chars,
219+
FormatSpecifier::QuestionMark,
220+
&mut callback,
221+
);
222+
}
223+
c if c == '_' || c.is_alphabetic() => {
224+
read_identifier(&mut chars, &mut callback);
225+
226+
if chars.peek().map(|&(_, c)| c) == Some('?') {
227+
skip_char_and_emit(
228+
&mut chars,
229+
FormatSpecifier::QuestionMark,
230+
&mut callback,
231+
);
232+
}
233+
}
234+
_ => {}
235+
}
236+
}
237+
238+
if let Some((_, '}')) = chars.peek() {
239+
skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
240+
}
241+
continue;
242+
}
243+
}
244+
245+
fn skip_char_and_emit<I, F>(
246+
chars: &mut std::iter::Peekable<I>,
247+
emit: FormatSpecifier,
248+
callback: &mut F,
249+
) where
250+
I: Iterator<Item = (TextRange, char)>,
251+
F: FnMut(TextRange, FormatSpecifier),
252+
{
253+
let (range, _) = chars.next().unwrap();
254+
callback(range, emit);
255+
}
256+
257+
fn read_integer<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
258+
where
259+
I: Iterator<Item = (TextRange, char)>,
260+
F: FnMut(TextRange, FormatSpecifier),
261+
{
262+
let (mut range, c) = chars.next().unwrap();
263+
assert!(c.is_ascii_digit());
264+
while let Some(&(r, next_char)) = chars.peek() {
265+
if next_char.is_ascii_digit() {
266+
chars.next();
267+
range = range.cover(r);
268+
} else {
269+
break;
270+
}
271+
}
272+
callback(range, FormatSpecifier::Integer);
273+
}
274+
275+
fn read_identifier<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
276+
where
277+
I: Iterator<Item = (TextRange, char)>,
278+
F: FnMut(TextRange, FormatSpecifier),
279+
{
280+
let (mut range, c) = chars.next().unwrap();
281+
assert!(c.is_alphabetic() || c == '_');
282+
while let Some(&(r, next_char)) = chars.peek() {
283+
if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
284+
chars.next();
285+
range = range.cover(r);
286+
} else {
287+
break;
288+
}
289+
}
290+
callback(range, FormatSpecifier::Identifier);
291+
}
292+
}

crates/syntax/src/ast.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,7 @@ pub use self::{
2525
SlicePatComponents, StructKind, TypeBoundKind, VisibilityKind,
2626
},
2727
operators::{ArithOp, BinaryOp, CmpOp, LogicOp, Ordering, RangeOp, UnaryOp},
28-
token_ext::{
29-
CommentKind, CommentPlacement, CommentShape, FormatSpecifier, HasFormatSpecifier, IsString,
30-
QuoteOffsets, Radix,
31-
},
28+
token_ext::{CommentKind, CommentPlacement, CommentShape, IsString, QuoteOffsets, Radix},
3229
traits::{
3330
DocCommentIter, HasArgList, HasAttrs, HasDocComments, HasGenericParams, HasLoopBody,
3431
HasModuleItem, HasName, HasTypeBounds, HasVisibility,

0 commit comments

Comments
 (0)