Skip to content

Commit ba27e2d

Browse files
committed
update to literal-escaper 0.0.4 for better API without unreachable and faster string parsing
1 parent 015c777 commit ba27e2d

File tree

22 files changed

+264
-271
lines changed

22 files changed

+264
-271
lines changed

Cargo.lock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,9 +3166,7 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
31663166

31673167
[[package]]
31683168
name = "rustc-literal-escaper"
3169-
version = "0.0.2"
3170-
source = "registry+https://github.com/rust-lang/crates.io-index"
3171-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
3169+
version = "0.0.4"
31723170

31733171
[[package]]
31743172
name = "rustc-main"

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,6 @@ codegen-units = 1
8989
# FIXME: LTO cannot be enabled for binaries in a workspace
9090
# <https://github.com/rust-lang/cargo/issues/9330>
9191
# lto = true
92+
93+
[patch.crates-io]
94+
rustc-literal-escaper = { path = '../literal-escaper/' }

compiler/rustc_ast/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ edition = "2024"
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
99
memchr = "2.7.4"
10-
rustc-literal-escaper = "0.0.2"
10+
rustc-literal-escaper = "0.0.4"
1111
rustc_ast_ir = { path = "../rustc_ast_ir" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }
1313
rustc_index = { path = "../rustc_index" }

compiler/rustc_ast/src/util/literal.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::{ascii, fmt, str};
44

55
use rustc_literal_escaper::{
6-
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
6+
MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
77
};
88
use rustc_span::{Span, Symbol, kw, sym};
99
use tracing::debug;
@@ -87,11 +87,10 @@ impl LitKind {
8787
// Force-inlining here is aggressive but the closure is
8888
// called on every char in the string, so it can be hot in
8989
// programs with many long strings containing escapes.
90-
unescape_unicode(
90+
unescape_str(
9191
s,
92-
Mode::Str,
93-
&mut #[inline(always)]
94-
|_, c| match c {
92+
#[inline(always)]
93+
|_, res| match res {
9594
Ok(c) => buf.push(c),
9695
Err(err) => {
9796
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -111,8 +110,8 @@ impl LitKind {
111110
token::ByteStr => {
112111
let s = symbol.as_str();
113112
let mut buf = Vec::with_capacity(s.len());
114-
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
115-
Ok(c) => buf.push(byte_from_char(c)),
113+
unescape_byte_str(s, |_, res| match res {
114+
Ok(b) => buf.push(b),
116115
Err(err) => {
117116
assert!(!err.is_fatal(), "failed to unescape string literal")
118117
}
@@ -128,7 +127,7 @@ impl LitKind {
128127
token::CStr => {
129128
let s = symbol.as_str();
130129
let mut buf = Vec::with_capacity(s.len());
131-
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
130+
unescape_c_str(s, |_span, c| match c {
132131
Ok(MixedUnit::Char(c)) => {
133132
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
134133
}

compiler/rustc_parse/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9-
rustc-literal-escaper = "0.0.2"
9+
rustc-literal-escaper = "0.0.4"
1010
rustc_ast = { path = "../rustc_ast" }
1111
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::ops::Range;
2-
31
use diagnostics::make_unclosed_delims_error;
42
use rustc_ast::ast::{self, AttrStyle};
53
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
@@ -10,7 +8,7 @@ use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
108
use rustc_lexer::{
119
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
1210
};
13-
use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
11+
use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
1412
use rustc_session::lint::BuiltinLintDiag;
1513
use rustc_session::lint::builtin::{
1614
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
@@ -702,7 +700,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
702700
}
703701
err.emit()
704702
}
705-
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
703+
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
706704
}
707705
rustc_lexer::LiteralKind::Byte { terminated } => {
708706
if !terminated {
@@ -714,7 +712,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
714712
.with_code(E0763)
715713
.emit()
716714
}
717-
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
715+
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
718716
}
719717
rustc_lexer::LiteralKind::Str { terminated } => {
720718
if !terminated {
@@ -726,7 +724,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
726724
.with_code(E0765)
727725
.emit()
728726
}
729-
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
727+
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
730728
}
731729
rustc_lexer::LiteralKind::ByteStr { terminated } => {
732730
if !terminated {
@@ -738,7 +736,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
738736
.with_code(E0766)
739737
.emit()
740738
}
741-
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
739+
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1)
740+
// b" "
742741
}
743742
rustc_lexer::LiteralKind::CStr { terminated } => {
744743
if !terminated {
@@ -750,13 +749,14 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
750749
.with_code(E0767)
751750
.emit()
752751
}
753-
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
752+
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
754753
}
755754
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
756755
if let Some(n_hashes) = n_hashes {
757756
let n = u32::from(n_hashes);
758757
let kind = token::StrRaw(n_hashes);
759-
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
758+
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n)
759+
// r##" "##
760760
} else {
761761
self.report_raw_str_error(start, 1);
762762
}
@@ -765,7 +765,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
765765
if let Some(n_hashes) = n_hashes {
766766
let n = u32::from(n_hashes);
767767
let kind = token::ByteStrRaw(n_hashes);
768-
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
768+
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n)
769+
// br##" "##
769770
} else {
770771
self.report_raw_str_error(start, 2);
771772
}
@@ -774,7 +775,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
774775
if let Some(n_hashes) = n_hashes {
775776
let n = u32::from(n_hashes);
776777
let kind = token::CStrRaw(n_hashes);
777-
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
778+
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n)
779+
// cr##" "##
778780
} else {
779781
self.report_raw_str_error(start, 2);
780782
}
@@ -1091,40 +1093,36 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
10911093
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
10921094
}
10931095

1094-
fn cook_common(
1096+
fn cook_quoted(
10951097
&self,
10961098
mut kind: token::LitKind,
10971099
mode: Mode,
10981100
start: BytePos,
10991101
end: BytePos,
11001102
prefix_len: u32,
11011103
postfix_len: u32,
1102-
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
11031104
) -> (token::LitKind, Symbol) {
11041105
let content_start = start + BytePos(prefix_len);
11051106
let content_end = end - BytePos(postfix_len);
11061107
let lit_content = self.str_from_to(content_start, content_end);
1107-
unescape(lit_content, mode, &mut |range, result| {
1108-
// Here we only check for errors. The actual unescaping is done later.
1109-
if let Err(err) = result {
1110-
let span_with_quotes = self.mk_sp(start, end);
1111-
let (start, end) = (range.start as u32, range.end as u32);
1112-
let lo = content_start + BytePos(start);
1113-
let hi = lo + BytePos(end - start);
1114-
let span = self.mk_sp(lo, hi);
1115-
let is_fatal = err.is_fatal();
1116-
if let Some(guar) = emit_unescape_error(
1117-
self.dcx(),
1118-
lit_content,
1119-
span_with_quotes,
1120-
span,
1121-
mode,
1122-
range,
1123-
err,
1124-
) {
1125-
assert!(is_fatal);
1126-
kind = token::Err(guar);
1127-
}
1108+
check_for_errors(lit_content, mode, |range, err| {
1109+
let span_with_quotes = self.mk_sp(start, end);
1110+
let (start, end) = (range.start as u32, range.end as u32);
1111+
let lo = content_start + BytePos(start);
1112+
let hi = lo + BytePos(end - start);
1113+
let span = self.mk_sp(lo, hi);
1114+
let is_fatal = err.is_fatal();
1115+
if let Some(guar) = emit_unescape_error(
1116+
self.dcx(),
1117+
lit_content,
1118+
span_with_quotes,
1119+
span,
1120+
mode,
1121+
range,
1122+
err,
1123+
) {
1124+
assert!(is_fatal);
1125+
kind = token::Err(guar);
11281126
}
11291127
});
11301128

@@ -1137,34 +1135,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
11371135
};
11381136
(kind, sym)
11391137
}
1140-
1141-
fn cook_unicode(
1142-
&self,
1143-
kind: token::LitKind,
1144-
mode: Mode,
1145-
start: BytePos,
1146-
end: BytePos,
1147-
prefix_len: u32,
1148-
postfix_len: u32,
1149-
) -> (token::LitKind, Symbol) {
1150-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
1151-
unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop)))
1152-
})
1153-
}
1154-
1155-
fn cook_mixed(
1156-
&self,
1157-
kind: token::LitKind,
1158-
mode: Mode,
1159-
start: BytePos,
1160-
end: BytePos,
1161-
prefix_len: u32,
1162-
postfix_len: u32,
1163-
) -> (token::LitKind, Symbol) {
1164-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
1165-
unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop)))
1166-
})
1167-
}
11681138
}
11691139

11701140
pub fn nfc_normalize(string: &str) -> Symbol {

compiler/rustc_parse_format/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8-
rustc-literal-escaper = "0.0.2"
8+
rustc-literal-escaper = "0.0.4"
99
rustc_lexer = { path = "../rustc_lexer" }
1010
# tidy-alphabetical-end
1111

compiler/rustc_parse_format/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use std::ops::Range;
2020
pub use Alignment::*;
2121
pub use Count::*;
2222
pub use Position::*;
23-
use rustc_literal_escaper::{Mode, unescape_unicode};
2423

2524
/// The type of format string that we are parsing.
2625
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -320,7 +319,7 @@ impl<'input> Parser<'input> {
320319
let without_quotes = &snippet[1..snippet.len() - 1];
321320
let (mut ok, mut vec) = (true, vec![]);
322321
let mut chars = input.chars();
323-
unescape_unicode(without_quotes, Mode::Str, &mut |range, res| match res {
322+
rustc_literal_escaper::unescape_str(without_quotes, |range, res| match res {
324323
Ok(ch) if ok && chars.next().is_some_and(|c| ch == c) => {
325324
vec.push((range, ch));
326325
}

compiler/rustc_proc_macro/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ test = false
1515
doctest = false
1616

1717
[dependencies]
18-
rustc-literal-escaper = "0.0.2"
18+
rustc-literal-escaper = "0.0.4"
1919

2020
[features]
2121
rustc-dep-of-std = []

library/Cargo.lock

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,9 @@ dependencies = [
283283

284284
[[package]]
285285
name = "rustc-literal-escaper"
286-
version = "0.0.2"
287-
source = "registry+https://github.com/rust-lang/crates.io-index"
288-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
286+
version = "0.0.4"
289287
dependencies = [
288+
"rustc-std-workspace-core",
290289
"rustc-std-workspace-std",
291290
]
292291

library/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,4 @@ rustc-std-workspace-core = { path = 'rustc-std-workspace-core' }
5151
rustc-std-workspace-alloc = { path = 'rustc-std-workspace-alloc' }
5252
rustc-std-workspace-std = { path = 'rustc-std-workspace-std' }
5353
compiler_builtins = { path = "compiler-builtins/compiler-builtins" }
54+
rustc-literal-escaper = { path = '../../literal-escaper/' }

library/proc_macro/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ std = { path = "../std" }
99
# `core` when resolving doc links. Without this line a different `core` will be
1010
# loaded from sysroot causing duplicate lang items and other similar errors.
1111
core = { path = "../core" }
12-
rustc-literal-escaper = { version = "0.0.2", features = ["rustc-dep-of-std"] }
12+
rustc-literal-escaper = { version = "0.0.4", features = ["rustc-dep-of-std"] }
1313

1414
[features]
1515
default = ["rustc-dep-of-std"]

library/proc_macro/src/lib.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ use std::{error, fmt};
5555
pub use diagnostic::{Diagnostic, Level, MultiSpan};
5656
#[unstable(feature = "proc_macro_value", issue = "136652")]
5757
pub use rustc_literal_escaper::EscapeError;
58-
use rustc_literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
58+
use rustc_literal_escaper::{MixedUnit, unescape_byte_str, unescape_c_str, unescape_str};
5959
#[unstable(feature = "proc_macro_totokens", issue = "130977")]
6060
pub use to_tokens::ToTokens;
6161

@@ -1439,10 +1439,9 @@ impl Literal {
14391439
// Force-inlining here is aggressive but the closure is
14401440
// called on every char in the string, so it can be hot in
14411441
// programs with many long strings containing escapes.
1442-
unescape_unicode(
1442+
unescape_str(
14431443
symbol,
1444-
Mode::Str,
1445-
&mut #[inline(always)]
1444+
#[inline(always)]
14461445
|_, c| match c {
14471446
Ok(c) => buf.push(c),
14481447
Err(err) => {
@@ -1471,7 +1470,7 @@ impl Literal {
14711470
let mut error = None;
14721471
let mut buf = Vec::with_capacity(symbol.len());
14731472

1474-
unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
1473+
unescape_c_str(symbol, |_span, c| match c {
14751474
Ok(MixedUnit::Char(c)) => {
14761475
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
14771476
}
@@ -1510,8 +1509,8 @@ impl Literal {
15101509
let mut buf = Vec::with_capacity(symbol.len());
15111510
let mut error = None;
15121511

1513-
unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
1514-
Ok(c) => buf.push(byte_from_char(c)),
1512+
unescape_byte_str(symbol, |_, res| match res {
1513+
Ok(b) => buf.push(b),
15151514
Err(err) => {
15161515
if err.is_fatal() {
15171516
error = Some(ConversionErrorKind::FailedToUnescape(err));

src/tools/clippy/clippy_dev/src/update_lints.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::utils::{
22
ErrAction, File, FileUpdater, RustSearcher, Token, UpdateMode, UpdateStatus, expect_action, update_text_region_fn,
33
};
44
use itertools::Itertools;
5+
use rustc_lexer::{LiteralKind, TokenKind, tokenize};
56
use std::collections::HashSet;
67
use std::fmt::Write;
78
use std::ops::Range;
@@ -342,7 +343,7 @@ fn parse_str_lit(s: &str) -> String {
342343
.and_then(|s| s.strip_suffix('"'))
343344
.unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
344345
let mut res = String::with_capacity(s.len());
345-
rustc_literal_escaper::unescape_unicode(s, mode, &mut |_, ch| {
346+
rustc_literal_escaper::unescape_str(s, |range, ch| {
346347
if let Ok(ch) = ch {
347348
res.push(ch);
348349
}

src/tools/lint-docs/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ description = "A script to extract the lint documentation for the rustc book."
77
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
88

99
[dependencies]
10-
rustc-literal-escaper = "0.0.2"
10+
rustc-literal-escaper = "0.0.4"
1111
serde_json = "1.0.57"
1212
tempfile = "3.1.0"
1313
walkdir = "2.3.1"

0 commit comments

Comments
 (0)