Add lots of tests

aochagavia · aochagavia · commit 94796e6447c8 · 2018-11-07T11:35:33.000+01:00
diff --git a/crates/ra_syntax/src/lexer/ptr.rs b/crates/ra_syntax/src/lexer/ptr.rs
@@ -30,8 +30,7 @@ impl<'s> Ptr<'s> {
     /// Gets the nth character from the current.
     /// For example, 0 will return the current token, 1 will return the next, etc.
     pub fn nth(&self, n: u32) -> Option<char> {
-        let mut chars = self.chars().peekable();
-        chars.by_ref().nth(n as usize)
+        self.chars().nth(n as usize)
     }
 
     /// Checks whether the current character is `c`.
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
@@ -121,11 +121,13 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
                         errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
                     }
                 }
-
-                // FIXME: we really need tests for this
             }
-            // Code points are always valid
-            CodePoint => (),
+            CodePoint => {
+                // These code points must always be escaped
+                if text == "\t" || text == "\r" {
+                    errors.push(SyntaxError::new(UnescapedCodepoint, range));
+                }
+            },
         }
     }
 
@@ -148,3 +150,115 @@ fn is_ascii_escape(code: char) -> bool {
         _ => false,
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::File;
+
+    fn build_file(literal: &str) -> File {
+        let src = format!("const C: char = '{}';", literal);
+        File::parse(&src)
+    }
+
+    fn assert_valid_char(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
+    }
+
+    fn assert_invalid_char(literal: &str) { //, expected_errors: HashSet<SyntaxErrorKind>) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+        //let found_errors = file.errors().iter().map(|e| e.kind()).collect();
+    }
+
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
+                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
+                _ => assert_valid_char(&(byte as char).to_string()),
+            }
+        }
+    }
+
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = [
+            "Ƒ", "バ", "メ", "﷽"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let invalid = [
+            "नी", "👨‍👨‍"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [
+            r"\'", "\"", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [
+            r"\a", r"\?", r"\"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [
+            r"\x00", r"\x7F", r"\x55"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [
+            r"\x", r"\x7", r"\xF0"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+
+     #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u", r"\u{}", r"\u{", r"\u{FF", r"\u{FFFFFF}", r"\u{_F}", r"\u{00FFFFF}", r"\u{110000}"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+}
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs
@@ -34,6 +34,10 @@ impl SyntaxError {
         }
     }
 
+    pub fn kind(&self) -> SyntaxErrorKind {
+        self.kind.clone()
+    }
+
     pub fn location(&self) -> Location {
         self.location.clone()
     }
@@ -64,6 +68,7 @@ impl fmt::Display for SyntaxError {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum SyntaxErrorKind {
     ParseError(ParseError),
+    UnescapedCodepoint,
     EmptyChar,
     UnclosedChar,
     LongChar,
@@ -86,6 +91,7 @@ impl fmt::Display for SyntaxErrorKind {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         use self::SyntaxErrorKind::*;
         match self {
+            UnescapedCodepoint => write!(f, "This codepoint should always be escaped"),
             EmptyAsciiEscape => write!(f, "Empty escape sequence"),
             InvalidAsciiEscape => write!(f, "Invalid escape sequence"),
             EmptyChar => write!(f, "Empty char literal"),

Original file line number	Diff line number	Diff line change
`@@ -30,8 +30,7 @@ impl<'s> Ptr<'s> {`
`30`	`30`	`/// Gets the nth character from the current.`
`31`	`31`	`/// For example, 0 will return the current token, 1 will return the next, etc.`
`32`	`32`	`pub fn nth(&self, n: u32) -> Option<char> {`
`33`		`- let mut chars = self.chars().peekable();`
`34`		`- chars.by_ref().nth(n as usize)`
	`33`	`+ self.chars().nth(n as usize)`
`35`	`34`	`}`
`36`	`35`
`37`	`36`	/// Checks whether the current character is `c`.