syntax: support ES6-style unicode escapes
First half of bootstrapping https://github.com/rust-lang/rfcs/pull/446
This commit is contained in:
parent
3a325c666d
commit
2e1a50121e
7 changed files with 169 additions and 8 deletions
|
@ -764,6 +764,15 @@ impl<'a> StringReader<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// SNAP c9f6d69
|
||||
#[allow(unused)]
|
||||
fn old_escape_warning(&mut self, sp: Span) {
|
||||
self.span_diagnostic
|
||||
.span_warn(sp, "\\U00ABCD12 and \\uABCD escapes are deprecated");
|
||||
self.span_diagnostic
|
||||
.span_help(sp, "use \\u{ABCD12} escapes instead");
|
||||
}
|
||||
|
||||
/// Scan for a single (possibly escaped) byte or char
|
||||
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
||||
|
@ -782,12 +791,24 @@ impl<'a> StringReader<'a> {
|
|||
Some(e) => {
|
||||
return match e {
|
||||
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
||||
'x' => self.scan_hex_digits(2u, delim, !ascii_only),
|
||||
'x' => self.scan_byte_escape(delim, !ascii_only),
|
||||
'u' if !ascii_only => {
|
||||
self.scan_hex_digits(4u, delim, false)
|
||||
if self.curr == Some('{') {
|
||||
self.scan_unicode_escape(delim)
|
||||
} else {
|
||||
let res = self.scan_hex_digits(4u, delim, false);
|
||||
// SNAP c9f6d69
|
||||
//let sp = codemap::mk_sp(escaped_pos, self.last_pos);
|
||||
//self.old_escape_warning(sp);
|
||||
res
|
||||
}
|
||||
}
|
||||
'U' if !ascii_only => {
|
||||
self.scan_hex_digits(8u, delim, false)
|
||||
let res = self.scan_hex_digits(8u, delim, false);
|
||||
// SNAP c9f6d69
|
||||
//let sp = codemap::mk_sp(escaped_pos, self.last_pos);
|
||||
//self.old_escape_warning(sp);
|
||||
res
|
||||
}
|
||||
'\n' if delim == '"' => {
|
||||
self.consume_whitespace();
|
||||
|
@ -848,6 +869,56 @@ impl<'a> StringReader<'a> {
|
|||
true
|
||||
}
|
||||
|
||||
/// Scan over a \u{...} escape
|
||||
///
|
||||
/// At this point, we have already seen the \ and the u, the { is the current character. We
|
||||
/// will read at least one digit, and up to 6, and pass over the }.
|
||||
fn scan_unicode_escape(&mut self, delim: char) -> bool {
|
||||
self.bump(); // past the {
|
||||
let start_bpos = self.last_pos;
|
||||
let mut count: uint = 0;
|
||||
let mut accum_int = 0;
|
||||
|
||||
while !self.curr_is('}') && count <= 6 {
|
||||
let c = match self.curr {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
self.fatal_span_(start_bpos, self.last_pos,
|
||||
"unterminated unicode escape (found EOF)");
|
||||
}
|
||||
};
|
||||
accum_int *= 16;
|
||||
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
||||
if c == delim {
|
||||
self.fatal_span_(self.last_pos, self.pos,
|
||||
"unterminated unicode escape (needed a `}`)");
|
||||
} else {
|
||||
self.fatal_span_char(self.last_pos, self.pos,
|
||||
"illegal character in unicode escape", c);
|
||||
}
|
||||
}) as u32;
|
||||
self.bump();
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if count > 6 {
|
||||
self.fatal_span_(start_bpos, self.last_pos,
|
||||
"overlong unicode escape (can have at most 6 hex digits)");
|
||||
}
|
||||
|
||||
self.bump(); // past the ending }
|
||||
|
||||
let mut valid = count >= 1 && count <= 6;
|
||||
if char::from_u32(accum_int).is_none() {
|
||||
valid = false;
|
||||
}
|
||||
|
||||
if !valid {
|
||||
self.fatal_span_(start_bpos, self.last_pos, "illegal unicode character escape");
|
||||
}
|
||||
valid
|
||||
}
|
||||
|
||||
/// Scan over a float exponent.
|
||||
fn scan_float_exponent(&mut self) {
|
||||
if self.curr_is('e') || self.curr_is('E') {
|
||||
|
@ -1273,6 +1344,10 @@ impl<'a> StringReader<'a> {
|
|||
return token::Byte(id);
|
||||
}
|
||||
|
||||
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
|
||||
self.scan_hex_digits(2, delim, below_0x7f_only)
|
||||
}
|
||||
|
||||
fn scan_byte_string(&mut self) -> token::Lit {
|
||||
self.bump();
|
||||
let start = self.last_pos;
|
||||
|
|
|
@ -393,16 +393,28 @@ pub fn char_lit(lit: &str) -> (char, int) {
|
|||
let msg = format!("lexer should have rejected a bad character escape {}", lit);
|
||||
let msg2 = msg.as_slice();
|
||||
|
||||
let esc: |uint| -> Option<(char, int)> = |len|
|
||||
fn esc(len: uint, lit: &str) -> Option<(char, int)> {
|
||||
num::from_str_radix(lit.slice(2, len), 16)
|
||||
.and_then(char::from_u32)
|
||||
.map(|x| (x, len as int));
|
||||
.map(|x| (x, len as int))
|
||||
}
|
||||
|
||||
let unicode_escape: || -> Option<(char, int)> = ||
|
||||
if lit.as_bytes()[2] == b'{' {
|
||||
let idx = lit.find('}').expect(msg2);
|
||||
let subslice = lit.slice(3, idx);
|
||||
num::from_str_radix(subslice, 16)
|
||||
.and_then(char::from_u32)
|
||||
.map(|x| (x, subslice.char_len() as int + 4))
|
||||
} else {
|
||||
esc(6, lit)
|
||||
};
|
||||
|
||||
// Unicode escapes
|
||||
return match lit.as_bytes()[1] as char {
|
||||
'x' | 'X' => esc(4),
|
||||
'u' => esc(6),
|
||||
'U' => esc(10),
|
||||
'x' | 'X' => esc(4, lit),
|
||||
'u' => unicode_escape(),
|
||||
'U' => esc(10, lit),
|
||||
_ => None,
|
||||
}.expect(msg2);
|
||||
}
|
||||
|
|
13
src/test/compile-fail/new-unicode-escapes-1.rs
Normal file
13
src/test/compile-fail/new-unicode-escapes-1.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub fn main() {
|
||||
let s = "\u{2603"; //~ ERROR unterminated unicode escape (needed a `}`)
|
||||
}
|
13
src/test/compile-fail/new-unicode-escapes-2.rs
Normal file
13
src/test/compile-fail/new-unicode-escapes-2.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub fn main() {
|
||||
let s = "\u{260311111111}"; //~ ERROR overlong unicode escape (can have at most 6 hex digits)
|
||||
}
|
13
src/test/compile-fail/new-unicode-escapes-3.rs
Normal file
13
src/test/compile-fail/new-unicode-escapes-3.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub fn main() {
|
||||
let s = "\u{d805}"; //~ ERROR illegal unicode character escape
|
||||
}
|
13
src/test/compile-fail/new-unicode-escapes-4.rs
Normal file
13
src/test/compile-fail/new-unicode-escapes-4.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub fn main() {
|
||||
let s = "\u{lol}"; //~ ERROR illegal character in unicode escape
|
||||
}
|
22
src/test/run-pass/new-unicode-escapes.rs
Normal file
22
src/test/run-pass/new-unicode-escapes.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub fn main() {
|
||||
let s = "\u{2603}";
|
||||
assert_eq!(s, "☃");
|
||||
|
||||
let s = "\u{2a10}\u{2A01}\u{2Aa0}";
|
||||
assert_eq!(s, "⨐⨁⪠");
|
||||
|
||||
let s = "\\{20}";
|
||||
let mut correct_s = String::from_str("\\");
|
||||
correct_s.push_str("{20}");
|
||||
assert_eq!(s, correct_s.as_slice());
|
||||
}
|
Loading…
Add table
Reference in a new issue