Skip to content

Commit b3b4140

Browse files
committed
Reserve guarded string literals (RFC 3593)
1 parent a8a88fe commit b3b4140

19 files changed

+633
-10
lines changed

compiler/rustc_lexer/src/cursor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::str::Chars;
44
///
55
/// Next characters can be peeked via `first` method,
66
/// and position can be shifted forward via `bump` method.
7+
#[derive(Clone)]
78
pub struct Cursor<'a> {
89
len_remaining: usize,
910
/// Iterator over chars. Slightly faster than a &str.

compiler/rustc_lexer/src/lib.rs

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -175,24 +175,27 @@ pub enum DocStyle {
175175
/// `rustc_ast::ast::LitKind`).
176176
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
177177
pub enum LiteralKind {
178-
/// "12_u8", "0o100", "0b120i99", "1f32".
178+
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
179179
Int { base: Base, empty_int: bool },
180-
/// "12.34f32", "1e3", but not "1f32".
180+
/// `12.34f32`, `1e3`, but not `1f32`.
181181
Float { base: Base, empty_exponent: bool },
182-
/// "'a'", "'\\'", "'''", "';"
182+
/// `'a'`, `'\\'`, `'''`, `';`
183183
Char { terminated: bool },
184-
/// "b'a'", "b'\\'", "b'''", "b';"
184+
/// `b'a'`, `b'\\'`, `b'''`, `b';`
185185
Byte { terminated: bool },
186-
/// ""abc"", ""abc"
186+
/// `"abc"`, `"abc`
187187
Str { terminated: bool },
188-
/// "b"abc"", "b"abc"
188+
/// `b"abc"`, `b"abc`
189189
ByteStr { terminated: bool },
190190
/// `c"abc"`, `c"abc`
191191
CStr { terminated: bool },
192-
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
192+
/// `#"abc"#`, `####"ab"###"c"####`, `#"a`, `##"a"#`. `None` indicates
193+
/// no closing quote.
194+
GuardedStr { n_hashes: Option<u8> },
195+
/// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
193196
/// an invalid literal.
194197
RawStr { n_hashes: Option<u8> },
195-
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
198+
/// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
196199
/// indicates an invalid literal.
197200
RawByteStr { n_hashes: Option<u8> },
198201
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
@@ -361,6 +364,36 @@ impl Cursor<'_> {
361364
_ => self.ident_or_unknown_prefix(),
362365
},
363366

367+
// Guarded string literal (reserved syntax).
368+
'#' if matches!(self.first(), '"' | '#') => {
369+
// Create a backup to restore later if this
370+
// turns out to not be a guarded literal.
371+
let backup = self.clone();
372+
373+
let mut n_start_hashes: u32 = 1; // Already captured one `#`.
374+
while self.first() == '#' {
375+
n_start_hashes += 1;
376+
self.bump();
377+
}
378+
379+
if self.first() == '"' {
380+
self.bump();
381+
382+
let res = self.guarded_double_quoted_string(n_start_hashes);
383+
let suffix_start = self.pos_within_token();
384+
if res.is_ok() {
385+
self.eat_literal_suffix();
386+
}
387+
let kind = GuardedStr { n_hashes: n_start_hashes.try_into().ok() };
388+
Literal { kind, suffix_start }
389+
} else {
390+
// Not a guarded string, so restore old state.
391+
*self = backup;
392+
// Return a pound token.
393+
Pound
394+
}
395+
}
396+
364397
// Byte literal, byte string literal, raw byte string literal or identifier.
365398
'b' => self.c_or_byte_string(
366399
|terminated| ByteStr { terminated },
@@ -754,6 +787,36 @@ impl Cursor<'_> {
754787
false
755788
}
756789

790+
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
791+
fn guarded_double_quoted_string(&mut self, n_start_hashes: u32) -> Result<u32, RawStrError> {
792+
debug_assert!(self.prev() == '"');
793+
794+
// Lex the string itself as a normal string literal
795+
// so we can recover that for older editions later.
796+
if !self.double_quoted_string() {
797+
return Err(RawStrError::NoTerminator {
798+
expected: n_start_hashes,
799+
found: 0,
800+
possible_terminator_offset: None,
801+
});
802+
}
803+
804+
// Check that amount of closing '#' symbols
805+
// is equal to the amount of opening ones.
806+
// Note that this will not consume extra trailing `#` characters:
807+
// `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3 }`
808+
// followed by a `#` token.
809+
let mut n_end_hashes = 0;
810+
while self.first() == '#' && n_end_hashes < n_start_hashes {
811+
n_end_hashes += 1;
812+
self.bump();
813+
}
814+
815+
// We could handle `n_end_hashes < n_start_hashes` here
816+
// but this whole token is an error anyways.
817+
Ok(n_end_hashes)
818+
}
819+
757820
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
758821
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
759822
// Wrap the actual function to handle the error with too many hashes.

compiler/rustc_lint/src/context/diagnostics.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,20 @@ pub(super) fn builtin(sess: &Session, diagnostic: BuiltinLintDiag, diag: &mut Di
155155
Applicability::MachineApplicable,
156156
);
157157
}
158+
BuiltinLintDiag::ReservedGuardedString(space_span) => {
159+
if let Some(space_span) = space_span {
160+
diag.span_suggestion_verbose(
161+
space_span,
162+
"insert whitespace here to avoid this being parsed as guarded string in Rust 2024",
163+
" ",
164+
Applicability::MachineApplicable,
165+
);
166+
} else {
167+
diag.help(
168+
"insert whitespace between the `#`s and the opening quote to avoid this being parsed as guarded string in Rust 2024",
169+
);
170+
}
171+
}
158172
BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
159173
diag.span_note(
160174
invoc_span,

compiler/rustc_lint_defs/src/builtin.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ declare_lint_pass! {
8888
RUST_2021_INCOMPATIBLE_OR_PATTERNS,
8989
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
9090
RUST_2021_PRELUDE_COLLISIONS,
91+
RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
9192
SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
9293
SINGLE_USE_LIFETIMES,
9394
SOFT_UNSTABLE,
@@ -4704,3 +4705,43 @@ declare_lint! {
47044705
};
47054706
crate_level_only
47064707
}
4708+
4709+
declare_lint! {
4710+
/// The `rust_2024_guarded_string_incompatible_syntax` lint detects `#` tokens
4711+
/// that will be parsed as part of a guarded string literal in Rust 2024.
4712+
///
4713+
/// ### Example
4714+
///
4715+
/// ```rust,edition2021,compile_fail
4716+
/// #![deny(rust_2024_guarded_string_incompatible_syntax)]
4717+
///
4718+
/// macro_rules! m {
4719+
/// (# $x:expr #) => ();
4720+
/// (# $x:expr) => ();
4721+
/// }
4722+
///
4723+
/// m!(#"hey"#);
4724+
/// m!(#"hello");
4725+
/// ```
4726+
///
4727+
/// {{produces}}
4728+
///
4729+
/// ### Explanation
4730+
///
4731+
/// Prior to Rust 2024, `#"hey"#` is three tokens: the first `#`
4732+
/// followed by the string literal `"hey"` then the final `#`.
4733+
/// In Rust 2024, the whole sequence is considered a single token.
4734+
///
4735+
/// This lint suggests to add whitespace between the leading `#`
4736+
/// and the string to keep them separated in Rust 2024.
4737+
// Allow this lint -- rustdoc doesn't yet support threading edition into this lint's parser.
4738+
#[allow(rustdoc::invalid_rust_codeblocks)]
4739+
pub RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
4740+
Allow,
4741+
"will be parsed as a guarded string in Rust 2024",
4742+
@future_incompatible = FutureIncompatibleInfo {
4743+
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2024),
4744+
reference: "issue #123735 <https://github.com/rust-lang/rust/issues/123735>",
4745+
};
4746+
crate_level_only
4747+
}

compiler/rustc_lint_defs/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,7 @@ pub enum BuiltinLintDiag {
591591
ProcMacroBackCompat(String),
592592
OrPatternsBackCompat(Span, String),
593593
ReservedPrefix(Span),
594+
ReservedGuardedString(Option<Span>),
594595
TrailingMacro(bool, Ident),
595596
BreakWithLabelAndLoop(Span),
596597
NamedAsmLabel(String),

compiler/rustc_parse/messages.ftl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
666666
.label = the label
667667
.suggestion = add `:` after the label
668668
669+
parse_reserved_guarded_string = invalid string literal
670+
.note = unprefixed guarded string literals are reserved for future use since Rust 2024
671+
.suggestion_whitespace = consider inserting whitespace here
672+
669673
parse_return_types_use_thin_arrow = return types are denoted using `->`
670674
.suggestion = use `->` instead
671675

compiler/rustc_parse/src/errors.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,24 @@ pub enum UnknownPrefixSugg {
20002000
},
20012001
}
20022002

2003+
#[derive(Diagnostic)]
2004+
#[diag(parse_reserved_guarded_string)]
2005+
#[note]
2006+
pub struct ReservedGuardedString {
2007+
#[primary_span]
2008+
pub span: Span,
2009+
#[subdiagnostic]
2010+
pub sugg: Option<GuardedStringSugg>,
2011+
}
2012+
#[derive(Subdiagnostic)]
2013+
#[suggestion(
2014+
parse_suggestion_whitespace,
2015+
code = " ",
2016+
applicability = "maybe-incorrect",
2017+
style = "verbose"
2018+
)]
2019+
pub struct GuardedStringSugg(#[primary_span] pub Span);
2020+
20032021
#[derive(Diagnostic)]
20042022
#[diag(parse_too_many_hashes)]
20052023
pub struct TooManyHashes {

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ use rustc_lexer::unescape::{self, EscapeError, Mode};
1212
use rustc_lexer::{Base, DocStyle, RawStrError};
1313
use rustc_lexer::{Cursor, LiteralKind};
1414
use rustc_session::lint::builtin::{
15-
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
15+
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
16+
TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
1617
};
1718
use rustc_session::lint::BuiltinLintDiag;
1819
use rustc_session::parse::ParseSess;
@@ -240,6 +241,40 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
240241
let prefix_span = self.mk_sp(start, lit_start);
241242
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
242243
}
244+
rustc_lexer::TokenKind::Literal {
245+
kind: rustc_lexer::LiteralKind::GuardedStr { n_hashes },
246+
suffix_start: _
247+
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2024() => {
248+
// Check if previous char was `#`, so we don't
249+
// lint for each `#` before the string.
250+
if !(
251+
start > self.start_pos &&
252+
self.str_from_to(start - BytePos(1), start) == "#"
253+
) {
254+
let span = self.mk_sp(start, self.pos);
255+
let space_span = n_hashes.map(|n_hashes| {
256+
let space_pos = start + BytePos(n_hashes.into());
257+
self.mk_sp(space_pos, space_pos)
258+
});
259+
260+
// Before Rust 2021, only emit a lint for migration.
261+
self.psess.buffer_lint_with_diagnostic(
262+
RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
263+
span,
264+
ast::CRATE_NODE_ID,
265+
"will be parsed as a guarded string in Rust 2024",
266+
BuiltinLintDiag::ReservedGuardedString(space_span),
267+
);
268+
}
269+
270+
// reset the state so that only the first `#` was consumed.
271+
let next = start + BytePos(1);
272+
self.pos = next;
273+
self.cursor = Cursor::new(&str_before[1..]);
274+
275+
let pound_span = self.mk_sp(start, next);
276+
return (Token::new(TokenKind::Pound, pound_span), preceded_by_whitespace);
277+
}
243278
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
244279
let suffix_start = start + BytePos(suffix_start);
245280
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -487,6 +522,32 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
487522
self.report_raw_str_error(start, 1);
488523
}
489524
}
525+
// RFC 3598 reserved this syntax for future use. As of Rust 2024,
526+
// using this syntax produces an error. In earlier editions, however, it
527+
// only results in an (allowed by default) lint, and is treated as
528+
// separate tokens.
529+
rustc_lexer::LiteralKind::GuardedStr { n_hashes } => {
530+
let span = self.mk_sp(start, self.pos);
531+
532+
if let Some(n_hashes) = n_hashes {
533+
let n = u32::from(n_hashes);
534+
let expn_data = span.ctxt().outer_expn_data();
535+
536+
let space_pos = start + BytePos(n);
537+
let space_span = self.mk_sp(space_pos, space_pos);
538+
539+
let sugg = if expn_data.is_root() {
540+
Some(errors::GuardedStringSugg(space_span))
541+
} else {
542+
None
543+
};
544+
545+
self.dcx().emit_err(errors::ReservedGuardedString { span, sugg });
546+
self.cook_unicode(token::Str, Mode::Str, start, end, 1 + n, 1 + n) // ##" "##
547+
} else {
548+
self.dcx().emit_fatal(errors::ReservedGuardedString { span, sugg: None });
549+
}
550+
}
490551
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
491552
if let Some(n_hashes) = n_hashes {
492553
let n = u32::from(n_hashes);

src/librustdoc/html/highlight.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,8 @@ impl<'src> Classifier<'src> {
850850
| LiteralKind::RawStr { .. }
851851
| LiteralKind::RawByteStr { .. }
852852
| LiteralKind::CStr { .. }
853-
| LiteralKind::RawCStr { .. } => Class::String,
853+
| LiteralKind::RawCStr { .. }
854+
| LiteralKind::GuardedStr { .. } => Class::String,
854855
// Number literals.
855856
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
856857
},
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//@ force-host
2+
//@ edition:2021
3+
//@ no-prefer-dynamic
4+
5+
#![crate_type = "proc-macro"]
6+
7+
extern crate proc_macro;
8+
9+
use proc_macro::TokenStream;
10+
use std::str::FromStr;
11+
12+
#[proc_macro]
13+
pub fn number_of_tokens_in_a_guarded_string_literal(_: TokenStream) -> TokenStream {
14+
TokenStream::from_str("#\"abc\"#").unwrap().into_iter().count().to_string().parse().unwrap()
15+
}
16+
17+
#[proc_macro]
18+
pub fn number_of_tokens_in_a_guarded_unterminated_string_literal(_: TokenStream) -> TokenStream {
19+
TokenStream::from_str("#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
20+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//@ force-host
2+
//@ compile-flags: -Zunstable-options
3+
//@ edition:2024
4+
//@ no-prefer-dynamic
5+
6+
#![crate_type = "proc-macro"]
7+
8+
extern crate proc_macro;
9+
10+
use proc_macro::TokenStream;
11+
use std::str::FromStr;
12+
13+
#[proc_macro]
14+
pub fn number_of_tokens_in_a_guarded_string_literal(_: TokenStream) -> TokenStream {
15+
TokenStream::from_str("#\"abc\"#").unwrap().into_iter().count().to_string().parse().unwrap()
16+
}
17+
18+
#[proc_macro]
19+
pub fn number_of_tokens_in_a_guarded_unterminated_string_literal(_: TokenStream) -> TokenStream {
20+
TokenStream::from_str("#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
21+
}

0 commit comments

Comments
 (0)