Skip to content

Commit c56db92

Browse files
committed
Finish implementing char validation
1 parent d1b2422 commit c56db92

File tree

3 files changed

+93
-8
lines changed

3 files changed

+93
-8
lines changed

crates/ra_syntax/src/string_lexing/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ mod tests {
219219

220220
#[test]
221221
fn test_unicode_escapes() {
222-
let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", ""];
222+
let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
223223
for escape in unicode_escapes {
224224
let escape_sequence = format!(r"'\u{}'", escape);
225225
let component = closed_char_component(&escape_sequence);

crates/ra_syntax/src/validation.rs

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::u32;
2+
13
use crate::{
24
algo::visit::{visitor_ctx, VisitorCtx},
35
ast::{self, AstNode},
@@ -42,15 +44,82 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
4244
}
4345
}
4446
AsciiCodeEscape => {
45-
// TODO:
46-
// * First digit is octal
47-
// * Second digit is hex
47+
// An AsciiCodeEscape has 4 chars, example: `\xDD`
48+
if text.len() < 4 {
49+
errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
50+
} else {
51+
assert!(text.chars().count() == 4, "AsciiCodeEscape cannot be longer than 4 chars");
52+
53+
match u8::from_str_radix(&text[2..], 16) {
54+
Ok(code) if code < 128 => { /* Escape code is valid */ },
55+
Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
56+
Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
57+
}
58+
59+
}
4860
}
4961
UnicodeEscape => {
50-
// TODO:
51-
// * Only hex digits or underscores allowed
52-
// * Max 6 chars
53-
// * Within allowed range (must be at most 10FFFF)
62+
assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
63+
64+
if text.len() == 2 {
65+
// No starting `{`
66+
errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
67+
return;
68+
}
69+
70+
if text.len() == 3 {
71+
// Only starting `{`
72+
errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
73+
return;
74+
}
75+
76+
let mut code = String::new();
77+
let mut closed = false;
78+
for c in text[3..].chars() {
79+
assert!(!closed, "no characters after escape is closed");
80+
81+
if c.is_digit(16) {
82+
code.push(c);
83+
} else if c == '_' {
84+
// Reject leading _
85+
if code.len() == 0 {
86+
errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
87+
return;
88+
}
89+
} else if c == '}' {
90+
closed = true;
91+
} else {
92+
errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
93+
return;
94+
}
95+
}
96+
97+
if !closed {
98+
errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
99+
}
100+
101+
if code.len() == 0 {
102+
errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
103+
return;
104+
}
105+
106+
if code.len() > 6 {
107+
errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
108+
}
109+
110+
match u32::from_str_radix(&code, 16) {
111+
Ok(code_u32) if code_u32 > 0x10FFFF => {
112+
errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
113+
}
114+
Ok(_) => {
115+
// Valid escape code
116+
}
117+
Err(_) => {
118+
errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
119+
}
120+
}
121+
122+
// FIXME: we really need tests for this
54123
}
55124
// Code points are always valid
56125
CodePoint => (),

crates/ra_syntax/src/yellow/syntax_error.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ pub enum SyntaxErrorKind {
6969
LongChar,
7070
EmptyAsciiEscape,
7171
InvalidAsciiEscape,
72+
TooShortAsciiCodeEscape,
73+
AsciiCodeEscapeOutOfRange,
74+
MalformedAsciiCodeEscape,
75+
UnclosedUnicodeEscape,
76+
MalformedUnicodeEscape,
77+
EmptyUnicodeEcape,
78+
OverlongUnicodeEscape,
79+
UnicodeEscapeOutOfRange,
7280
}
7381

7482
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -83,6 +91,14 @@ impl fmt::Display for SyntaxErrorKind {
8391
EmptyChar => write!(f, "Empty char literal"),
8492
UnclosedChar => write!(f, "Unclosed char literal"),
8593
LongChar => write!(f, "Char literal should be one character long"),
94+
TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"),
95+
AsciiCodeEscapeOutOfRange => write!(f, "Escape sequence should be between \\x00 and \\x7F"),
96+
MalformedAsciiCodeEscape => write!(f, "Escape sequence should be a hexadecimal number"),
97+
UnclosedUnicodeEscape => write!(f, "Missing `}}`"),
98+
MalformedUnicodeEscape => write!(f, "Malformed unicode escape sequence"),
99+
EmptyUnicodeEcape => write!(f, "Empty unicode escape sequence"),
100+
OverlongUnicodeEscape => write!(f, "Unicode escape sequence should have at most 6 digits"),
101+
UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"),
86102
ParseError(msg) => write!(f, "{}", msg.0),
87103
}
88104
}

0 commit comments

Comments
 (0)