Validate and transcribe raw strings via unescape module
This commit is contained in:
parent
08ede49dcb
commit
cab7e7fe76
5 changed files with 63 additions and 50 deletions
|
@ -1086,10 +1086,12 @@ impl<'a> StringReader<'a> {
|
|||
Ok(TokenKind::lit(token::Str, symbol, suffix))
|
||||
}
|
||||
'r' => {
|
||||
let (kind, symbol) = self.scan_raw_string();
|
||||
let (start, end, hash_count) = self.scan_raw_string();
|
||||
let symbol = self.name_from_to(start, end);
|
||||
self.validate_raw_str_escape(start, end);
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
|
||||
Ok(TokenKind::lit(kind, symbol, suffix))
|
||||
Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix))
|
||||
}
|
||||
'-' => {
|
||||
if self.nextch_is('>') {
|
||||
|
@ -1243,7 +1245,7 @@ impl<'a> StringReader<'a> {
|
|||
id
|
||||
}
|
||||
|
||||
fn scan_raw_string(&mut self) -> (token::LitKind, Symbol) {
|
||||
fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) {
|
||||
let start_bpos = self.pos;
|
||||
self.bump();
|
||||
let mut hash_count: u16 = 0;
|
||||
|
@ -1273,7 +1275,6 @@ impl<'a> StringReader<'a> {
|
|||
self.bump();
|
||||
let content_start_bpos = self.pos;
|
||||
let mut content_end_bpos;
|
||||
let mut valid = true;
|
||||
'outer: loop {
|
||||
match self.ch {
|
||||
None => {
|
||||
|
@ -1289,29 +1290,14 @@ impl<'a> StringReader<'a> {
|
|||
}
|
||||
break;
|
||||
}
|
||||
Some(c) => {
|
||||
if c == '\r' && !self.nextch_is('\n') {
|
||||
let last_bpos = self.pos;
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"bare CR not allowed in raw string, use \\r \
|
||||
instead");
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
self.bump();
|
||||
}
|
||||
|
||||
self.bump();
|
||||
|
||||
let symbol = if valid {
|
||||
self.name_from_to(content_start_bpos, content_end_bpos)
|
||||
} else {
|
||||
Symbol::intern("??")
|
||||
};
|
||||
|
||||
(token::StrRaw(hash_count), symbol)
|
||||
(content_start_bpos, content_end_bpos, hash_count)
|
||||
}
|
||||
|
||||
fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) {
|
||||
|
@ -1421,6 +1407,23 @@ impl<'a> StringReader<'a> {
|
|||
});
|
||||
}
|
||||
|
||||
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
|
||||
self.with_str_from_to(content_start, content_end, |lit: &str| {
|
||||
unescape::unescape_raw_str(lit, &mut |range, c| {
|
||||
if let Err(err) = c {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
lit,
|
||||
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
|
||||
unescape::Mode::Str,
|
||||
range,
|
||||
err,
|
||||
)
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
|
||||
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||
unescape::unescape_byte_str(lit, &mut |range, c| {
|
||||
|
|
|
@ -4,7 +4,8 @@ use crate::ast::{self, Lit, LitKind};
|
|||
use crate::parse::parser::Parser;
|
||||
use crate::parse::PResult;
|
||||
use crate::parse::token::{self, Token, TokenKind};
|
||||
use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte};
|
||||
use crate::parse::unescape::{unescape_str, unescape_byte_str, unescape_raw_str};
|
||||
use crate::parse::unescape::{unescape_char, unescape_byte};
|
||||
use crate::print::pprust;
|
||||
use crate::symbol::{kw, sym, Symbol};
|
||||
use crate::tokenstream::{TokenStream, TokenTree};
|
||||
|
@ -141,7 +142,17 @@ impl LitKind {
|
|||
// Ditto.
|
||||
let s = symbol.as_str();
|
||||
let symbol = if s.contains('\r') {
|
||||
Symbol::intern(&raw_str_lit(&s))
|
||||
let mut buf = String::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_raw_str(&s, &mut |_, unescaped_char| {
|
||||
match unescaped_char {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => error = Err(LitError::LexerError),
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.shrink_to_fit();
|
||||
Symbol::intern(&buf)
|
||||
} else {
|
||||
symbol
|
||||
};
|
||||
|
@ -350,29 +361,6 @@ crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option<S
|
|||
}
|
||||
}
|
||||
|
||||
/// Parses a string representing a raw string literal into its final form. The
|
||||
/// only operation this does is convert embedded CRLF into a single LF.
|
||||
fn raw_str_lit(lit: &str) -> String {
|
||||
debug!("raw_str_lit: {:?}", lit);
|
||||
let mut res = String::with_capacity(lit.len());
|
||||
|
||||
let mut chars = lit.chars().peekable();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\r' {
|
||||
if *chars.peek().unwrap() != '\n' {
|
||||
panic!("lexer accepted bare CR");
|
||||
}
|
||||
chars.next();
|
||||
res.push('\n');
|
||||
} else {
|
||||
res.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
res.shrink_to_fit();
|
||||
res
|
||||
}
|
||||
|
||||
// Checks if `s` looks like i32 or u1234 etc.
|
||||
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
|
||||
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
|
||||
|
|
|
@ -66,6 +66,28 @@ where
|
|||
})
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of characters or errors.
|
||||
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||
/// only translate CRLF to LF and produce errors on bare CR.
|
||||
pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
let mut byte_offset: usize = 0;
|
||||
|
||||
let mut chars = literal_text.chars().peekable();
|
||||
while let Some(curr) = chars.next() {
|
||||
let result = match (curr, chars.peek()) {
|
||||
('\r', Some('\n')) => Ok(curr),
|
||||
('\r', _) => Err(EscapeError::BareCarriageReturn),
|
||||
_ => Ok(curr),
|
||||
};
|
||||
callback(byte_offset..(byte_offset + curr.len_utf8()), result);
|
||||
byte_offset += curr.len_utf8();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) enum Mode {
|
||||
Char,
|
||||
|
|
|
@ -21,7 +21,7 @@ fn main() {
|
|||
let _s = "foo
bar"; //~ ERROR: bare CR not allowed in string
|
||||
|
||||
// the following string literal has a bare CR in it
|
||||
let _s = r"bar
foo"; //~ ERROR: bare CR not allowed in raw string
|
||||
let _s = r"bar
foo"; //~ ERROR: bare CR not allowed in string
|
||||
|
||||
// the following string literal has a bare CR in it
|
||||
let _s = "foo\
bar"; //~ ERROR: unknown character escape: \r
|
||||
|
|
|
@ -28,11 +28,11 @@ error: bare CR not allowed in string, use \r instead
|
|||
LL | let _s = "foo
bar";
|
||||
| ^
|
||||
|
||||
error: bare CR not allowed in raw string, use \r instead
|
||||
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:24:14
|
||||
error: bare CR not allowed in string, use \r instead
|
||||
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:24:19
|
||||
|
|
||||
LL | let _s = r"bar
foo";
|
||||
| ^^^^^
|
||||
| ^
|
||||
|
||||
error: unknown character escape: \r
|
||||
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:27:19
|
||||
|
|
Loading…
Add table
Reference in a new issue