Skip to content

Commit 780616e

Browse files
committed
proc_macro: Validate inputs to Punct::new and Ident::new
1 parent f116ab6 commit 780616e

12 files changed

+203
-22
lines changed

src/libproc_macro/lib.rs

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ use std::str::FromStr;
5555
use syntax::ast;
5656
use syntax::errors::DiagnosticBuilder;
5757
use syntax::parse::{self, token};
58-
use syntax::symbol::Symbol;
58+
use syntax::symbol::{keywords, Symbol};
5959
use syntax::tokenstream;
60-
use syntax::parse::lexer::comments;
60+
use syntax::parse::lexer::{self, comments};
6161
use syntax_pos::{FileMap, Pos, SyntaxContext, FileName};
6262
use syntax_pos::hygiene::Mark;
6363

@@ -700,16 +700,13 @@ impl fmt::Display for Group {
700700
/// Multicharacter operators like `+=` are represented as two instances of `Punct` with different
701701
/// forms of `Spacing` returned.
702702
///
703-
/// REVIEW We should guarantee that `Punct` contains a valid punctuation character permitted by
704-
/// REVIEW the language and not a random unicode code point. The check is already performed in
705-
/// REVIEW `TokenTree::to_internal`, but we should do it on construction.
706-
/// REVIEW `Punct` can also avoid using `char` internally and keep an u8-like enum.
707-
///
708703
/// REVIEW ATTENTION: `Copy` impl on a struct with private fields.
709704
/// REVIEW Do we want to guarantee `Punct` to be `Copy`?
710705
#[unstable(feature = "proc_macro", issue = "38356")]
711706
#[derive(Copy, Clone, Debug)]
712707
pub struct Punct {
708+
// REVIEW(INTERNAL) `Punct` can avoid using `char` internally and
709+
// REVIEW(INTERNAL) can keep u8 or an u8-like enum.
713710
ch: char,
714711
spacing: Spacing,
715712
span: Span,
@@ -733,17 +730,21 @@ pub enum Spacing {
733730

734731
impl Punct {
735732
/// Creates a new `Punct` from the given character and spacing.
733+
/// The `ch` argument must be a valid punctuation character permitted by the language,
734+
/// otherwise the function will panic.
736735
///
737736
/// The returned `Punct` will have the default span of `Span::call_site()`
738737
/// which can be further configured with the `set_span` method below.
739738
///
740739
/// REVIEW Why we even use `char` here? There's no reason to use unicode here.
741740
/// REVIEW I guess because it's more convenient to write `new('+')` than `new(b'+')`, that's ok.
742-
///
743-
/// REVIEW TO_DO Do input validation on construction, the argument should be a valid punctuation
744-
/// REVIEW character permitted by the language.
745741
#[unstable(feature = "proc_macro", issue = "38356")]
746742
pub fn new(ch: char, spacing: Spacing) -> Punct {
743+
const LEGAL_CHARS: &[char] = &['=', '<', '>', '!', '~', '+', '-', '*', '/', '%',
744+
'^', '&', '|', '@', '.', ',', ';', ':', '#', '$', '?'];
745+
if !LEGAL_CHARS.contains(&ch) {
746+
panic!("unsupported character `{:?}`", ch)
747+
}
747748
Punct {
748749
ch: ch,
749750
spacing: spacing,
@@ -794,9 +795,6 @@ impl fmt::Display for Punct {
794795

795796
/// An identifier (`ident`) or lifetime identifier (`'ident`).
796797
///
797-
/// REVIEW We should guarantee that `Ident` contains a valid identifier permitted by
798-
/// REVIEW the language and not a random unicode string, at least for a start.
799-
///
800798
/// REVIEW ATTENTION: `Copy` impl on a struct with private fields.
801799
/// REVIEW Do we want to guarantee `Ident` to be `Copy`?
802800
#[derive(Copy, Clone, Debug)]
@@ -816,6 +814,8 @@ impl !Sync for Ident {}
816814
impl Ident {
817815
/// Creates a new `Ident` with the given `string` as well as the specified
818816
/// `span`.
817+
/// The `string` argument must be a valid identifier or lifetime identifier permitted by the
818+
/// language, otherwise the function will panic.
819819
///
820820
/// Note that `span`, currently in rustc, configures the hygiene information
821821
/// for this identifier.
@@ -831,11 +831,11 @@ impl Ident {
831831
///
832832
/// Due to the current importance of hygiene this constructor, unlike other
833833
/// tokens, requires a `Span` to be specified at construction.
834-
///
835-
/// REVIEW TO_DO Do input validation, the argument should be a valid identifier or
836-
/// REVIEW lifetime identifier.
837834
#[unstable(feature = "proc_macro", issue = "38356")]
838835
pub fn new(string: &str, span: Span) -> Ident {
836+
if !lexer::is_valid_ident(string) {
837+
panic!("`{:?}` is not a valid identifier", string)
838+
}
839839
Ident {
840840
sym: Symbol::intern(string),
841841
span,
@@ -847,6 +847,11 @@ impl Ident {
847847
#[unstable(feature = "proc_macro", issue = "38356")]
848848
pub fn new_raw(string: &str, span: Span) -> Ident {
849849
let mut ident = Ident::new(string, span);
850+
if ident.sym == keywords::Underscore.name() ||
851+
token::is_path_segment_keyword(ast::Ident::with_empty_ctxt(ident.sym)) ||
852+
ident.sym.as_str().starts_with("\'") {
853+
panic!("`{:?}` is not a valid raw identifier", string)
854+
}
850855
ident.is_raw = true;
851856
ident
852857
}
@@ -1365,7 +1370,7 @@ impl TokenTree {
13651370
#[unstable(feature = "proc_macro_internals", issue = "27812")]
13661371
#[doc(hidden)]
13671372
pub mod __internal {
1368-
pub use quote::{LiteralKind, Quoter, unquote};
1373+
pub use quote::{LiteralKind, SpannedSymbol, Quoter, unquote};
13691374

13701375
use std::cell::Cell;
13711376

src/libproc_macro/quote.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use {Delimiter, Literal, Spacing, Span, Ident, Punct, Group, TokenStream, TokenT
1818

1919
use syntax::ext::base::{ExtCtxt, ProcMacro};
2020
use syntax::parse::token;
21+
use syntax::symbol::Symbol;
2122
use syntax::tokenstream;
2223

2324
pub struct Quoter;
@@ -195,14 +196,32 @@ impl Quote for Span {
195196

196197
macro_rules! literals {
197198
($($i:ident),*; $($raw:ident),*) => {
199+
pub struct SpannedSymbol {
200+
sym: Symbol,
201+
span: Span,
202+
}
203+
204+
impl SpannedSymbol {
205+
pub fn new(string: &str, span: Span) -> SpannedSymbol {
206+
SpannedSymbol { sym: Symbol::intern(string), span }
207+
}
208+
}
209+
210+
impl Quote for SpannedSymbol {
211+
fn quote(self) -> TokenStream {
212+
quote!(::__internal::SpannedSymbol::new((quote self.sym.as_str()),
213+
(quote self.span)))
214+
}
215+
}
216+
198217
pub enum LiteralKind {
199218
$($i,)*
200219
$($raw(u16),)*
201220
}
202221

203222
impl LiteralKind {
204-
pub fn with_contents_and_suffix(self, contents: Ident, suffix: Option<Ident>)
205-
-> Literal {
223+
pub fn with_contents_and_suffix(self, contents: SpannedSymbol,
224+
suffix: Option<SpannedSymbol>) -> Literal {
206225
let sym = contents.sym;
207226
let suffix = suffix.map(|t| t.sym);
208227
match self {
@@ -225,13 +244,14 @@ macro_rules! literals {
225244
}
226245

227246
impl Literal {
228-
fn kind_contents_and_suffix(self) -> (LiteralKind, Ident, Option<Ident>) {
247+
fn kind_contents_and_suffix(self) -> (LiteralKind, SpannedSymbol, Option<SpannedSymbol>)
248+
{
229249
let (kind, contents) = match self.lit {
230250
$(token::Lit::$i(contents) => (LiteralKind::$i, contents),)*
231251
$(token::Lit::$raw(contents, n) => (LiteralKind::$raw(n), contents),)*
232252
};
233-
let suffix = self.suffix.map(|sym| Ident::new(&sym.as_str(), self.span()));
234-
(kind, Ident::new(&contents.as_str(), self.span()), suffix)
253+
let suffix = self.suffix.map(|sym| SpannedSymbol::new(&sym.as_str(), self.span()));
254+
(kind, SpannedSymbol::new(&contents.as_str(), self.span()), suffix)
235255
}
236256
}
237257

src/libsyntax/parse/lexer/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,15 @@ fn ident_continue(c: Option<char>) -> bool {
17701770
(c > '\x7f' && c.is_xid_continue())
17711771
}
17721772

1773+
// The string is a valid identifier or a lifetime identifier.
1774+
pub fn is_valid_ident(s: &str) -> bool {
1775+
let mut chars = s.chars();
1776+
match chars.next() {
1777+
Some('\'') => ident_start(chars.next()) && chars.all(|ch| ident_continue(Some(ch))),
1778+
ch => ident_start(ch) && chars.all(|ch| ident_continue(Some(ch)))
1779+
}
1780+
}
1781+
17731782
#[cfg(test)]
17741783
mod tests {
17751784
use super::*;
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// force-host
12+
// no-prefer-dynamic
13+
14+
#![feature(proc_macro)]
15+
#![crate_type = "proc-macro"]
16+
17+
extern crate proc_macro;
18+
use proc_macro::*;
19+
20+
#[proc_macro]
21+
pub fn invalid_punct(_: TokenStream) -> TokenStream {
22+
TokenTree::from(Punct::new('`', Spacing::Alone)).into()
23+
}
24+
25+
#[proc_macro]
26+
pub fn invalid_ident(_: TokenStream) -> TokenStream {
27+
TokenTree::from(Ident::new("*", Span::call_site())).into()
28+
}
29+
30+
#[proc_macro]
31+
pub fn invalid_raw_ident(_: TokenStream) -> TokenStream {
32+
TokenTree::from(Ident::new_raw("self", Span::call_site())).into()
33+
}
34+
35+
#[proc_macro]
36+
pub fn lexer_failure(_: TokenStream) -> TokenStream {
37+
"a b ) c".parse().expect("parsing failed without panic")
38+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// aux-build:invalid-punct-ident.rs
12+
#![feature(proc_macro)]
13+
#[macro_use]
14+
extern crate invalid_punct_ident;
15+
16+
invalid_punct!(); //~ ERROR proc macro panicked
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
error: proc macro panicked
2+
--> $DIR/invalid-punct-ident-1.rs:16:1
3+
|
4+
LL | invalid_punct!(); //~ ERROR proc macro panicked
5+
| ^^^^^^^^^^^^^^^^^
6+
|
7+
= help: message: unsupported character `'`'`
8+
9+
error: aborting due to previous error
10+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// aux-build:invalid-punct-ident.rs
12+
#![feature(proc_macro)]
13+
#[macro_use]
14+
extern crate invalid_punct_ident;
15+
16+
invalid_ident!(); //~ ERROR proc macro panicked
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
error: proc macro panicked
2+
--> $DIR/invalid-punct-ident-2.rs:16:1
3+
|
4+
LL | invalid_ident!(); //~ ERROR proc macro panicked
5+
| ^^^^^^^^^^^^^^^^^
6+
|
7+
= help: message: `"*"` is not a valid identifier
8+
9+
error: aborting due to previous error
10+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// aux-build:invalid-punct-ident.rs
12+
#![feature(proc_macro)]
13+
#[macro_use]
14+
extern crate invalid_punct_ident;
15+
16+
invalid_raw_ident!(); //~ ERROR proc macro panicked
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
error: proc macro panicked
2+
--> $DIR/invalid-punct-ident-3.rs:16:1
3+
|
4+
LL | invalid_raw_ident!(); //~ ERROR proc macro panicked
5+
| ^^^^^^^^^^^^^^^^^^^^^
6+
|
7+
= help: message: `"self"` is not a valid raw identifier
8+
9+
error: aborting due to previous error
10+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// aux-build:invalid-punct-ident.rs
12+
#![feature(proc_macro)]
13+
#[macro_use]
14+
extern crate invalid_punct_ident;
15+
16+
lexer_failure!(); //~ ERROR proc macro panicked
17+
//~| ERROR unexpected close delimiter: `)`
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
error: unexpected close delimiter: `)`
2+
--> $DIR/invalid-punct-ident-4.rs:16:1
3+
|
4+
LL | lexer_failure!(); //~ ERROR proc macro panicked
5+
| ^^^^^^^^^^^^^^^^^
6+
7+
error: proc macro panicked
8+
--> $DIR/invalid-punct-ident-4.rs:16:1
9+
|
10+
LL | lexer_failure!(); //~ ERROR proc macro panicked
11+
| ^^^^^^^^^^^^^^^^^
12+
13+
error: aborting due to 2 previous errors
14+

0 commit comments

Comments
 (0)