diff --git a/crates/ra_syntax/src/ast/generated.rs b/crates/ra_syntax/src/ast/generated.rs index 5b5f71ee7db..2e9ae263a98 100644 --- a/crates/ra_syntax/src/ast/generated.rs +++ b/crates/ra_syntax/src/ast/generated.rs @@ -3236,6 +3236,43 @@ impl<'a> AstNode<'a> for Stmt<'a> { impl<'a> Stmt<'a> {} +// String +#[derive(Debug, Clone, Copy,)] +pub struct StringNode = OwnedRoot> { + pub(crate) syntax: SyntaxNode, +} +pub type String<'a> = StringNode>; + +impl, R2: TreeRoot> PartialEq> for StringNode { + fn eq(&self, other: &StringNode) -> bool { self.syntax == other.syntax } +} +impl> Eq for StringNode {} +impl> Hash for StringNode { + fn hash(&self, state: &mut H) { self.syntax.hash(state) } +} + +impl<'a> AstNode<'a> for String<'a> { + fn cast(syntax: SyntaxNodeRef<'a>) -> Option { + match syntax.kind() { + STRING => Some(String { syntax }), + _ => None, + } + } + fn syntax(self) -> SyntaxNodeRef<'a> { self.syntax } +} + +impl> StringNode { + pub fn borrowed(&self) -> String { + StringNode { syntax: self.syntax.borrowed() } + } + pub fn owned(&self) -> StringNode { + StringNode { syntax: self.syntax.owned() } + } +} + + +impl<'a> String<'a> {} + // StructDef #[derive(Debug, Clone, Copy,)] pub struct StructDefNode = OwnedRoot> { diff --git a/crates/ra_syntax/src/ast/mod.rs b/crates/ra_syntax/src/ast/mod.rs index 6b0d62610c3..f20714ede6d 100644 --- a/crates/ra_syntax/src/ast/mod.rs +++ b/crates/ra_syntax/src/ast/mod.rs @@ -1,6 +1,7 @@ mod generated; use std::marker::PhantomData; +use std::string::String as RustString; use itertools::Itertools; @@ -76,7 +77,7 @@ pub trait DocCommentsOwner<'a>: AstNode<'a> { /// Returns the textual content of a doc comment block as a single string. /// That is, strips leading `///` and joins lines - fn doc_comment_text(self) -> String { + fn doc_comment_text(self) -> RustString { self.doc_comments() .map(|comment| { let prefix = comment.prefix(); @@ -133,6 +134,12 @@ impl<'a> Char<'a> { } } +impl<'a> String<'a> { + pub fn text(&self) -> &SmolStr { + &self.syntax().leaf_text().unwrap() + } +} + impl<'a> Comment<'a> { pub fn text(&self) -> &SmolStr { self.syntax().leaf_text().unwrap() diff --git a/crates/ra_syntax/src/grammar.ron b/crates/ra_syntax/src/grammar.ron index a928444156e..c3184667e85 100644 --- a/crates/ra_syntax/src/grammar.ron +++ b/crates/ra_syntax/src/grammar.ron @@ -411,6 +411,7 @@ Grammar( "PrefixExpr": (), "RangeExpr": (), "BinExpr": (), + "String": (), "Char": (), "Literal": (), diff --git a/crates/ra_syntax/src/string_lexing/mod.rs b/crates/ra_syntax/src/string_lexing.rs similarity index 72% rename from crates/ra_syntax/src/string_lexing/mod.rs rename to crates/ra_syntax/src/string_lexing.rs index cc53e0abac7..d613bb0429a 100644 --- a/crates/ra_syntax/src/string_lexing/mod.rs +++ b/crates/ra_syntax/src/string_lexing.rs @@ -1,6 +1,68 @@ use self::CharComponentKind::*; use rowan::{TextRange, TextUnit}; +pub fn parse_string_literal(src: &str) -> StringComponentIterator { + StringComponentIterator { + parser: Parser::new(src), + has_closing_quote: false, + } +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct StringComponent { + pub range: TextRange, + pub kind: StringComponentKind, +} + +impl StringComponent { + fn new(range: TextRange, kind: StringComponentKind) -> StringComponent { + StringComponent { range, kind } + } +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum StringComponentKind { + IgnoreNewline, + Char(CharComponentKind), +} + +pub struct StringComponentIterator<'a> { + parser: Parser<'a>, + pub has_closing_quote: bool, +} + +impl<'a> Iterator for StringComponentIterator<'a> { + type Item = StringComponent; + fn next(&mut self) -> Option { + if self.parser.pos == 0 { + assert!( + self.parser.advance() == '"', + "string literal should start with double quotes" + ); + } + + if let Some(component) = self.parser.parse_string_component() { + return Some(component); + } + + // We get here when there are no char components left to parse + if self.parser.peek() == Some('"') { + self.parser.advance(); + self.has_closing_quote = true; + } + + assert!( + self.parser.peek() == None, + "string literal should leave no unparsed input: src = {}, pos = {}, length = {}", + self.parser.src, + self.parser.pos, + self.parser.src.len() + ); + + None + } +} + pub fn parse_char_literal(src: &str) -> CharComponentIterator { CharComponentIterator { parser: Parser::new(src), @@ -93,6 +155,12 @@ impl<'a> Parser<'a> { next } + pub fn skip_whitespace(&mut self) { + while self.peek().map(|c| c.is_whitespace()) == Some(true) { + self.advance(); + } + } + pub fn get_pos(&self) -> TextUnit { (self.pos as u32).into() } @@ -172,6 +240,51 @@ impl<'a> Parser<'a> { )) } } + + pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option { + // In string literals, when a `\` occurs immediately before the newline, the `\`, + // the newline, and all whitespace at the beginning of the next line are ignored + match self.peek() { + Some('\n') | Some('\r') => { + self.skip_whitespace(); + Some(StringComponent::new( + TextRange::from_to(start, self.get_pos()), + StringComponentKind::IgnoreNewline, + )) + } + _ => None, + } + } + + pub fn parse_string_component(&mut self) -> Option { + let next = self.peek()?; + + // Ignore string close + if next == '"' { + return None; + } + + let start = self.get_pos(); + self.advance(); + + if next == '\\' { + // Strings can use `\` to ignore newlines, so we first try to parse one of those + // before falling back to parsing char escapes + self.parse_ignore_newline(start).or_else(|| { + let char_component = self.parse_escape(start); + Some(StringComponent::new( + char_component.range, + StringComponentKind::Char(char_component.kind), + )) + }) + } else { + let end = self.get_pos(); + Some(StringComponent::new( + TextRange::from_to(start, end), + StringComponentKind::Char(CodePoint), + )) + } + } } #[cfg(test)] diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs deleted file mode 100644 index a10b297c0f0..00000000000 --- a/crates/ra_syntax/src/validation.rs +++ /dev/null @@ -1,271 +0,0 @@ -use std::u32; - -use arrayvec::ArrayString; - -use crate::{ - algo::visit::{visitor_ctx, VisitorCtx}, - ast::{self, AstNode}, - SourceFileNode, - string_lexing::{self, CharComponentKind}, - yellow::{ - SyntaxError, - SyntaxErrorKind::*, - }, -}; - -pub(crate) fn validate(file: &SourceFileNode) -> Vec { - let mut errors = Vec::new(); - for node in file.syntax().descendants() { - let _ = visitor_ctx(&mut errors) - .visit::(validate_char) - .accept(node); - } - errors -} - -fn validate_char(node: ast::Char, errors: &mut Vec) { - let mut components = string_lexing::parse_char_literal(node.text()); - let mut len = 0; - for component in &mut components { - len += 1; - - // Validate escapes - let text = &node.text()[component.range]; - let range = component.range + node.syntax().range().start(); - use self::CharComponentKind::*; - match component.kind { - AsciiEscape => { - if text.len() == 1 { - // Escape sequence consists only of leading `\` - errors.push(SyntaxError::new(EmptyAsciiEscape, range)); - } else { - let escape_code = text.chars().skip(1).next().unwrap(); - if !is_ascii_escape(escape_code) { - errors.push(SyntaxError::new(InvalidAsciiEscape, range)); - } - } - } - AsciiCodeEscape => { - // An AsciiCodeEscape has 4 chars, example: `\xDD` - if text.len() < 4 { - errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range)); - } else { - assert!( - text.chars().count() == 4, - "AsciiCodeEscape cannot be longer than 4 chars" - ); - - match u8::from_str_radix(&text[2..], 16) { - Ok(code) if code < 128 => { /* Escape code is valid */ } - Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)), - Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)), - } - } - } - UnicodeEscape => { - assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u"); - - if text.len() == 2 { - // No starting `{` - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - return; - } - - if text.len() == 3 { - // Only starting `{` - errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)); - return; - } - - let mut code = ArrayString::<[_; 6]>::new(); - let mut closed = false; - for c in text[3..].chars() { - assert!(!closed, "no characters after escape is closed"); - - if c.is_digit(16) { - if code.len() == 6 { - errors.push(SyntaxError::new(OverlongUnicodeEscape, range)); - return; - } - - code.push(c); - } else if c == '_' { - // Reject leading _ - if code.len() == 0 { - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - return; - } - } else if c == '}' { - closed = true; - } else { - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - return; - } - } - - if !closed { - errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)) - } - - if code.len() == 0 { - errors.push(SyntaxError::new(EmptyUnicodeEcape, range)); - return; - } - - match u32::from_str_radix(&code, 16) { - Ok(code_u32) if code_u32 > 0x10FFFF => { - errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range)); - } - Ok(_) => { - // Valid escape code - } - Err(_) => { - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - } - } - } - CodePoint => { - // These code points must always be escaped - if text == "\t" || text == "\r" { - errors.push(SyntaxError::new(UnescapedCodepoint, range)); - } - } - } - } - - if !components.has_closing_quote { - errors.push(SyntaxError::new(UnclosedChar, node.syntax().range())); - } - - if len == 0 { - errors.push(SyntaxError::new(EmptyChar, node.syntax().range())); - } - - if len > 1 { - errors.push(SyntaxError::new(LongChar, node.syntax().range())); - } -} - -fn is_ascii_escape(code: char) -> bool { - match code { - '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true, - _ => false, - } -} - -#[cfg(test)] -mod test { - use crate::SourceFileNode; - - fn build_file(literal: &str) -> SourceFileNode { - let src = format!("const C: char = '{}';", literal); - SourceFileNode::parse(&src) - } - - fn assert_valid_char(literal: &str) { - let file = build_file(literal); - assert!( - file.errors().len() == 0, - "Errors for literal '{}': {:?}", - literal, - file.errors() - ); - } - - fn assert_invalid_char(literal: &str) { - let file = build_file(literal); - assert!(file.errors().len() > 0); - } - - #[test] - fn test_ansi_codepoints() { - for byte in 0..=255u8 { - match byte { - b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()), - b'\'' | b'\\' => { /* Ignore character close and backslash */ } - _ => assert_valid_char(&(byte as char).to_string()), - } - } - } - - #[test] - fn test_unicode_codepoints() { - let valid = ["Ƒ", "バ", "メ", "﷽"]; - for c in &valid { - assert_valid_char(c); - } - } - - #[test] - fn test_unicode_multiple_codepoints() { - let invalid = ["नी", "👨‍👨‍"]; - for c in &invalid { - assert_invalid_char(c); - } - } - - #[test] - fn test_valid_ascii_escape() { - let valid = [ - r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b", - ]; - for c in &valid { - assert_valid_char(c); - } - } - - #[test] - fn test_invalid_ascii_escape() { - let invalid = [r"\a", r"\?", r"\"]; - for c in &invalid { - assert_invalid_char(c); - } - } - - #[test] - fn test_valid_ascii_code_escape() { - let valid = [r"\x00", r"\x7F", r"\x55"]; - for c in &valid { - assert_valid_char(c); - } - } - - #[test] - fn test_invalid_ascii_code_escape() { - let invalid = [r"\x", r"\x7", r"\xF0"]; - for c in &invalid { - assert_invalid_char(c); - } - } - - #[test] - fn test_valid_unicode_escape() { - let valid = [ - r"\u{FF}", - r"\u{0}", - r"\u{F}", - r"\u{10FFFF}", - r"\u{1_0__FF___FF_____}", - ]; - for c in &valid { - assert_valid_char(c); - } - } - - #[test] - fn test_invalid_unicode_escape() { - let invalid = [ - r"\u", - r"\u{}", - r"\u{", - r"\u{FF", - r"\u{FFFFFF}", - r"\u{_F}", - r"\u{00FFFFF}", - r"\u{110000}", - ]; - for c in &invalid { - assert_invalid_char(c); - } - } -} diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs new file mode 100644 index 00000000000..63f9bad248a --- /dev/null +++ b/crates/ra_syntax/src/validation/char.rs @@ -0,0 +1,270 @@ +use std::u32; + +use arrayvec::ArrayString; + +use crate::{ + ast::{self, AstNode}, + string_lexing::{self, CharComponentKind}, + TextRange, + yellow::{ + SyntaxError, + SyntaxErrorKind::*, + }, +}; + +pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec) { + let literal_text = node.text(); + let literal_range = node.syntax().range(); + let mut components = string_lexing::parse_char_literal(literal_text); + let mut len = 0; + for component in &mut components { + len += 1; + let text = &literal_text[component.range]; + let range = component.range + literal_range.start(); + validate_char_component(text, component.kind, range, errors); + } + + if !components.has_closing_quote { + errors.push(SyntaxError::new(UnclosedChar, literal_range)); + } + + if len == 0 { + errors.push(SyntaxError::new(EmptyChar, literal_range)); + } + + if len > 1 { + errors.push(SyntaxError::new(OverlongChar, literal_range)); + } +} + +pub(crate) fn validate_char_component( + text: &str, + kind: CharComponentKind, + range: TextRange, + errors: &mut Vec, +) { + // Validate escapes + use self::CharComponentKind::*; + match kind { + AsciiEscape => { + if text.len() == 1 { + // Escape sequence consists only of leading `\` + errors.push(SyntaxError::new(EmptyAsciiEscape, range)); + } else { + let escape_code = text.chars().skip(1).next().unwrap(); + if !is_ascii_escape(escape_code) { + errors.push(SyntaxError::new(InvalidAsciiEscape, range)); + } + } + } + AsciiCodeEscape => { + // An AsciiCodeEscape has 4 chars, example: `\xDD` + if text.len() < 4 { + errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range)); + } else { + assert!( + text.chars().count() == 4, + "AsciiCodeEscape cannot be longer than 4 chars" + ); + + match u8::from_str_radix(&text[2..], 16) { + Ok(code) if code < 128 => { /* Escape code is valid */ } + Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)), + Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)), + } + } + } + UnicodeEscape => { + assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u"); + + if text.len() == 2 { + // No starting `{` + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + + if text.len() == 3 { + // Only starting `{` + errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)); + return; + } + + let mut code = ArrayString::<[_; 6]>::new(); + let mut closed = false; + for c in text[3..].chars() { + assert!(!closed, "no characters after escape is closed"); + + if c.is_digit(16) { + if code.len() == 6 { + errors.push(SyntaxError::new(OverlongUnicodeEscape, range)); + return; + } + + code.push(c); + } else if c == '_' { + // Reject leading _ + if code.len() == 0 { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + } else if c == '}' { + closed = true; + } else { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + } + + if !closed { + errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)) + } + + if code.len() == 0 { + errors.push(SyntaxError::new(EmptyUnicodeEcape, range)); + return; + } + + match u32::from_str_radix(&code, 16) { + Ok(code_u32) if code_u32 > 0x10FFFF => { + errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range)); + } + Ok(_) => { + // Valid escape code + } + Err(_) => { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + } + } + } + CodePoint => { + // These code points must always be escaped + if text == "\t" || text == "\r" { + errors.push(SyntaxError::new(UnescapedCodepoint, range)); + } + } + } +} + +fn is_ascii_escape(code: char) -> bool { + match code { + '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true, + _ => false, + } +} + +#[cfg(test)] +mod test { + use crate::SourceFileNode; + + fn build_file(literal: &str) -> SourceFileNode { + let src = format!("const C: char = '{}';", literal); + SourceFileNode::parse(&src) + } + + fn assert_valid_char(literal: &str) { + let file = build_file(literal); + assert!( + file.errors().len() == 0, + "Errors for literal '{}': {:?}", + literal, + file.errors() + ); + } + + fn assert_invalid_char(literal: &str) { + let file = build_file(literal); + assert!(file.errors().len() > 0); + } + + #[test] + fn test_ansi_codepoints() { + for byte in 0..=255u8 { + match byte { + b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()), + b'\'' | b'\\' => { /* Ignore character close and backslash */ } + _ => assert_valid_char(&(byte as char).to_string()), + } + } + } + + #[test] + fn test_unicode_codepoints() { + let valid = ["Ƒ", "バ", "メ", "﷽"]; + for c in &valid { + assert_valid_char(c); + } + } + + #[test] + fn test_unicode_multiple_codepoints() { + let invalid = ["नी", "👨‍👨‍"]; + for c in &invalid { + assert_invalid_char(c); + } + } + + #[test] + fn test_valid_ascii_escape() { + let valid = [ + r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b", + ]; + for c in &valid { + assert_valid_char(c); + } + } + + #[test] + fn test_invalid_ascii_escape() { + let invalid = [r"\a", r"\?", r"\"]; + for c in &invalid { + assert_invalid_char(c); + } + } + + #[test] + fn test_valid_ascii_code_escape() { + let valid = [r"\x00", r"\x7F", r"\x55"]; + for c in &valid { + assert_valid_char(c); + } + } + + #[test] + fn test_invalid_ascii_code_escape() { + let invalid = [r"\x", r"\x7", r"\xF0"]; + for c in &invalid { + assert_invalid_char(c); + } + } + + #[test] + fn test_valid_unicode_escape() { + let valid = [ + r"\u{FF}", + r"\u{0}", + r"\u{F}", + r"\u{10FFFF}", + r"\u{1_0__FF___FF_____}", + ]; + for c in &valid { + assert_valid_char(c); + } + } + + #[test] + fn test_invalid_unicode_escape() { + let invalid = [ + r"\u", + r"\u{}", + r"\u{", + r"\u{FF", + r"\u{FFFFFF}", + r"\u{_F}", + r"\u{00FFFFF}", + r"\u{110000}", + ]; + for c in &invalid { + assert_invalid_char(c); + } + } +} diff --git a/crates/ra_syntax/src/validation/mod.rs b/crates/ra_syntax/src/validation/mod.rs new file mode 100644 index 00000000000..2ff0bc26d32 --- /dev/null +++ b/crates/ra_syntax/src/validation/mod.rs @@ -0,0 +1,20 @@ +use crate::{ + algo::visit::{visitor_ctx, VisitorCtx}, + ast, + SourceFileNode, + yellow::SyntaxError, +}; + +mod char; +mod string; + +pub(crate) fn validate(file: &SourceFileNode) -> Vec { + let mut errors = Vec::new(); + for node in file.syntax().descendants() { + let _ = visitor_ctx(&mut errors) + .visit::(self::char::validate_char_node) + .visit::(self::string::validate_string_node) + .accept(node); + } + errors +} diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs new file mode 100644 index 00000000000..089879d1530 --- /dev/null +++ b/crates/ra_syntax/src/validation/string.rs @@ -0,0 +1,168 @@ +use crate::{ + ast::{self, AstNode}, + string_lexing::{self, StringComponentKind}, + yellow::{ + SyntaxError, + SyntaxErrorKind::*, + }, +}; + +use super::char; + +pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec) { + let literal_text = node.text(); + let literal_range = node.syntax().range(); + let mut components = string_lexing::parse_string_literal(literal_text); + for component in &mut components { + let range = component.range + literal_range.start(); + + match component.kind { + StringComponentKind::Char(kind) => { + // Chars must escape \t, \n and \r codepoints, but strings don't + let text = &literal_text[component.range]; + match text { + "\t" | "\n" | "\r" => { /* always valid */ } + _ => char::validate_char_component(text, kind, range, errors), + } + } + StringComponentKind::IgnoreNewline => { /* always valid */ } + } + } + + if !components.has_closing_quote { + errors.push(SyntaxError::new(UnclosedString, literal_range)); + } +} + +#[cfg(test)] +mod test { + use crate::SourceFileNode; + + fn build_file(literal: &str) -> SourceFileNode { + let src = format!(r#"const S: &'static str = "{}";"#, literal); + println!("Source: {}", src); + SourceFileNode::parse(&src) + } + + fn assert_valid_str(literal: &str) { + let file = build_file(literal); + assert!( + file.errors().len() == 0, + "Errors for literal '{}': {:?}", + literal, + file.errors() + ); + } + + fn assert_invalid_str(literal: &str) { + let file = build_file(literal); + assert!(file.errors().len() > 0); + } + + #[test] + fn test_ansi_codepoints() { + for byte in 0..=255u8 { + match byte { + b'\"' | b'\\' => { /* Ignore string close and backslash */ } + _ => assert_valid_str(&(byte as char).to_string()), + } + } + } + + #[test] + fn test_unicode_codepoints() { + let valid = ["Ƒ", "バ", "メ", "﷽"]; + for c in &valid { + assert_valid_str(c); + } + } + + #[test] + fn test_unicode_multiple_codepoints() { + let valid = ["नी", "👨‍👨‍"]; + for c in &valid { + assert_valid_str(c); + } + } + + #[test] + fn test_valid_ascii_escape() { + let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"]; + for c in &valid { + assert_valid_str(c); + } + } + + #[test] + fn test_invalid_ascii_escape() { + let invalid = [r"\a", r"\?", r"\"]; + for c in &invalid { + assert_invalid_str(c); + } + } + + #[test] + fn test_valid_ascii_code_escape() { + let valid = [r"\x00", r"\x7F", r"\x55"]; + for c in &valid { + assert_valid_str(c); + } + } + + #[test] + fn test_invalid_ascii_code_escape() { + let invalid = [r"\x", r"\x7", r"\xF0"]; + for c in &invalid { + assert_invalid_str(c); + } + } + + #[test] + fn test_valid_unicode_escape() { + let valid = [ + r"\u{FF}", + r"\u{0}", + r"\u{F}", + r"\u{10FFFF}", + r"\u{1_0__FF___FF_____}", + ]; + for c in &valid { + assert_valid_str(c); + } + } + + #[test] + fn test_invalid_unicode_escape() { + let invalid = [ + r"\u", + r"\u{}", + r"\u{", + r"\u{FF", + r"\u{FFFFFF}", + r"\u{_F}", + r"\u{00FFFFF}", + r"\u{110000}", + ]; + for c in &invalid { + assert_invalid_str(c); + } + } + + #[test] + fn test_mixed() { + assert_valid_str( + r"This is the tale of a string +with a newline in between, some emoji (👨‍👨‍) here and there, +unicode escapes like this: \u{1FFBB} and weird stuff like +this ﷽", + ); + } + + #[test] + fn test_ignore_newline() { + assert_valid_str( + "Hello \ + World", + ); + } +} diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs index c524adf3954..cf7b1d495c8 100644 --- a/crates/ra_syntax/src/yellow/syntax_error.rs +++ b/crates/ra_syntax/src/yellow/syntax_error.rs @@ -71,7 +71,7 @@ pub enum SyntaxErrorKind { UnescapedCodepoint, EmptyChar, UnclosedChar, - LongChar, + OverlongChar, EmptyAsciiEscape, InvalidAsciiEscape, TooShortAsciiCodeEscape, @@ -82,6 +82,7 @@ pub enum SyntaxErrorKind { EmptyUnicodeEcape, OverlongUnicodeEscape, UnicodeEscapeOutOfRange, + UnclosedString, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -96,7 +97,7 @@ impl fmt::Display for SyntaxErrorKind { InvalidAsciiEscape => write!(f, "Invalid escape sequence"), EmptyChar => write!(f, "Empty char literal"), UnclosedChar => write!(f, "Unclosed char literal"), - LongChar => write!(f, "Char literal should be one character long"), + OverlongChar => write!(f, "Char literal should be one character long"), TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"), AsciiCodeEscapeOutOfRange => { write!(f, "Escape sequence should be between \\x00 and \\x7F") @@ -109,6 +110,7 @@ impl fmt::Display for SyntaxErrorKind { write!(f, "Unicode escape sequence should have at most 6 digits") } UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"), + UnclosedString => write!(f, "Unclosed string literal"), ParseError(msg) => write!(f, "{}", msg.0), } }