Make non-ASCII errors more consistent.
There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference.
This commit is contained in:
parent
34b32b0dac
commit
7dbf2c0ed8
15 changed files with 62 additions and 74 deletions
|
@ -52,10 +52,8 @@ pub enum EscapeError {
|
|||
|
||||
/// Unicode escape code in byte literal.
|
||||
UnicodeEscapeInByte,
|
||||
/// Non-ascii character in byte literal.
|
||||
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
|
||||
NonAsciiCharInByte,
|
||||
/// Non-ascii character in byte string literal.
|
||||
NonAsciiCharInByteString,
|
||||
|
||||
/// After a line ending with '\', the next line contains whitespace
|
||||
/// characters that are not skipped.
|
||||
|
@ -349,8 +347,7 @@ where
|
|||
let start = src.len() - chars.as_str().len() - c.len_utf8();
|
||||
let result = match c {
|
||||
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
|
||||
c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString),
|
||||
c => Ok(c),
|
||||
_ => ascii_check(c, is_byte),
|
||||
};
|
||||
let end = src.len() - chars.as_str().len();
|
||||
callback(start..end, result);
|
||||
|
|
|
@ -289,9 +289,6 @@ fn test_unescape_raw_byte_str() {
|
|||
}
|
||||
|
||||
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
|
||||
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
|
||||
check(
|
||||
"🦀a",
|
||||
&[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))],
|
||||
);
|
||||
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
|
||||
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]);
|
||||
}
|
||||
|
|
|
@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error(
|
|||
.emit();
|
||||
}
|
||||
EscapeError::NonAsciiCharInByte => {
|
||||
assert!(mode.is_byte());
|
||||
let (c, span) = last_char();
|
||||
let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
|
||||
let desc = match mode {
|
||||
Mode::Byte => "byte literal",
|
||||
Mode::ByteStr => "byte string literal",
|
||||
Mode::RawByteStr => "raw byte string literal",
|
||||
_ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
|
||||
};
|
||||
let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
|
||||
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
|
||||
format!(" but is {:?}", c)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
|
||||
if (c as u32) <= 0xFF {
|
||||
err.span_label(span, &format!("must be ASCII{}", postfix));
|
||||
// Note: the \\xHH suggestions are not given for raw byte string
|
||||
// literals, because they are araw and so cannot use any escapes.
|
||||
if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
|
||||
err.span_suggestion(
|
||||
span,
|
||||
&format!(
|
||||
|
@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error(
|
|||
format!("\\x{:X}", c as u32),
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
} else if matches!(mode, Mode::Byte) {
|
||||
} else if mode == Mode::Byte {
|
||||
err.span_label(span, "this multibyte character does not fit into a single byte");
|
||||
} else if matches!(mode, Mode::ByteStr) {
|
||||
} else if mode != Mode::RawByteStr {
|
||||
let mut utf8 = String::new();
|
||||
utf8.push(c);
|
||||
err.span_suggestion(
|
||||
|
@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error(
|
|||
}
|
||||
err.emit();
|
||||
}
|
||||
EscapeError::NonAsciiCharInByteString => {
|
||||
assert!(mode.is_byte());
|
||||
let (c, span) = last_char();
|
||||
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
|
||||
format!(" but is {:?}", c)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
handler
|
||||
.struct_span_err(span, "raw byte string must be ASCII")
|
||||
.span_label(span, &format!("must be ASCII{}", postfix))
|
||||
.emit();
|
||||
}
|
||||
EscapeError::OutOfRangeHexEscape => {
|
||||
handler
|
||||
.struct_span_err(span, "out of range hex escape")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#![feature(rustc_attrs)]
|
||||
|
||||
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant
|
||||
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte string literal
|
||||
fn main() {}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/key-value-non-ascii.rs:3:19
|
||||
|
|
||||
LL | #[rustc_dummy = b"ffi.rs"]
|
||||
| ^ byte constant must be ASCII
|
||||
| ^ must be ASCII
|
||||
|
|
||||
help: if you meant to use the UTF-8 encoding of 'ffi', use \xHH escapes
|
||||
|
|
||||
|
|
|
@ -7,6 +7,6 @@ pub fn main() {
|
|||
b'\x0Z'; //~ ERROR invalid character in numeric character escape: `Z`
|
||||
b' '; //~ ERROR byte constant must be escaped
|
||||
b'''; //~ ERROR byte constant must be escaped
|
||||
b'é'; //~ ERROR non-ASCII character in byte constant
|
||||
b'é'; //~ ERROR non-ASCII character in byte literal
|
||||
b'a //~ ERROR unterminated byte constant [E0763]
|
||||
}
|
||||
|
|
|
@ -32,11 +32,11 @@ error: byte constant must be escaped: `'`
|
|||
LL | b''';
|
||||
| ^ help: escape the character: `\'`
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte literal
|
||||
--> $DIR/byte-literals.rs:10:7
|
||||
|
|
||||
LL | b'é';
|
||||
| ^ byte constant must be ASCII
|
||||
| ^ must be ASCII
|
||||
|
|
||||
help: if you meant to use the unicode code point for 'é', use a \xHH escape
|
||||
|
|
||||
|
|
|
@ -3,7 +3,7 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape
|
|||
pub fn main() {
|
||||
b"\f"; //~ ERROR unknown byte escape
|
||||
b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z`
|
||||
b"é"; //~ ERROR non-ASCII character in byte constant
|
||||
br##"é"##; //~ ERROR raw byte string must be ASCII
|
||||
b"é"; //~ ERROR non-ASCII character in byte string literal
|
||||
br##"é"##; //~ ERROR non-ASCII character in raw byte string literal
|
||||
b"a //~ ERROR unterminated double quote byte string
|
||||
}
|
||||
|
|
|
@ -20,18 +20,18 @@ error: invalid character in numeric character escape: `Z`
|
|||
LL | b"\x0Z";
|
||||
| ^ invalid character in numeric character escape
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/byte-string-literals.rs:6:7
|
||||
|
|
||||
LL | b"é";
|
||||
| ^ byte constant must be ASCII
|
||||
| ^ must be ASCII
|
||||
|
|
||||
help: if you meant to use the unicode code point for 'é', use a \xHH escape
|
||||
|
|
||||
LL | b"\xE9";
|
||||
| ~~~~
|
||||
|
||||
error: raw byte string must be ASCII
|
||||
error: non-ASCII character in raw byte string literal
|
||||
--> $DIR/byte-string-literals.rs:7:10
|
||||
|
|
||||
LL | br##"é"##;
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
|
||||
pub fn main() {
|
||||
br"a
"; //~ ERROR bare CR not allowed in raw string
|
||||
br"é"; //~ ERROR raw byte string must be ASCII
|
||||
br"é"; //~ ERROR non-ASCII character in raw byte string literal
|
||||
br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ error: bare CR not allowed in raw string
|
|||
LL | br"a
";
|
||||
| ^
|
||||
|
||||
error: raw byte string must be ASCII
|
||||
error: non-ASCII character in raw byte string literal
|
||||
--> $DIR/raw-byte-string-literals.rs:5:8
|
||||
|
|
||||
LL | br"é";
|
||||
|
|
|
@ -14,15 +14,15 @@ fn main() {
|
|||
println!("{:?}", r##"/* } if isAdmin begin admins only "##);
|
||||
//~^ ERROR unicode codepoint changing visible direction of text present in literal
|
||||
println!("{:?}", b"/* } if isAdmin begin admins only ");
|
||||
//~^ ERROR non-ASCII character in byte constant
|
||||
//~| ERROR non-ASCII character in byte constant
|
||||
//~| ERROR non-ASCII character in byte constant
|
||||
//~| ERROR non-ASCII character in byte constant
|
||||
//~^ ERROR non-ASCII character in byte string literal
|
||||
//~| ERROR non-ASCII character in byte string literal
|
||||
//~| ERROR non-ASCII character in byte string literal
|
||||
//~| ERROR non-ASCII character in byte string literal
|
||||
println!("{:?}", br##"/* } if isAdmin begin admins only "##);
|
||||
//~^ ERROR raw byte string must be ASCII
|
||||
//~| ERROR raw byte string must be ASCII
|
||||
//~| ERROR raw byte string must be ASCII
|
||||
//~| ERROR raw byte string must be ASCII
|
||||
//~^ ERROR non-ASCII character in raw byte string literal
|
||||
//~| ERROR non-ASCII character in raw byte string literal
|
||||
//~| ERROR non-ASCII character in raw byte string literal
|
||||
//~| ERROR non-ASCII character in raw byte string literal
|
||||
println!("{:?}", '');
|
||||
//~^ ERROR unicode codepoint changing visible direction of text present in literal
|
||||
}
|
||||
|
|
|
@ -14,69 +14,69 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
|
|||
|
|
||||
= help: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:16:26
|
||||
|
|
||||
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
|
||||
| ^ byte constant must be ASCII but is '\u{202e}'
|
||||
| ^ must be ASCII but is '\u{202e}'
|
||||
|
|
||||
help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes
|
||||
|
|
||||
LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only ");
|
||||
| ~~~~~~~~~~~~
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:16:30
|
||||
|
|
||||
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
|
||||
| ^ byte constant must be ASCII but is '\u{2066}'
|
||||
| ^ must be ASCII but is '\u{2066}'
|
||||
|
|
||||
help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
|
||||
|
|
||||
LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only ");
|
||||
| ~~~~~~~~~~~~
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:16:41
|
||||
|
|
||||
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
|
||||
| ^ byte constant must be ASCII but is '\u{2069}'
|
||||
| ^ must be ASCII but is '\u{2069}'
|
||||
|
|
||||
help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes
|
||||
|
|
||||
LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only ");
|
||||
| ~~~~~~~~~~~~
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:16:43
|
||||
|
|
||||
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
|
||||
| ^ byte constant must be ASCII but is '\u{2066}'
|
||||
| ^ must be ASCII but is '\u{2066}'
|
||||
|
|
||||
help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
|
||||
|
|
||||
LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only ");
|
||||
| ~~~~~~~~~~~~
|
||||
|
||||
error: raw byte string must be ASCII
|
||||
error: non-ASCII character in raw byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:21:29
|
||||
|
|
||||
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
|
||||
| ^ must be ASCII but is '\u{202e}'
|
||||
|
||||
error: raw byte string must be ASCII
|
||||
error: non-ASCII character in raw byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:21:33
|
||||
|
|
||||
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
|
||||
| ^ must be ASCII but is '\u{2066}'
|
||||
|
||||
error: raw byte string must be ASCII
|
||||
error: non-ASCII character in raw byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:21:44
|
||||
|
|
||||
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
|
||||
| ^ must be ASCII but is '\u{2069}'
|
||||
|
||||
error: raw byte string must be ASCII
|
||||
error: non-ASCII character in raw byte string literal
|
||||
--> $DIR/unicode-control-codepoints.rs:21:46
|
||||
|
|
||||
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
|
||||
|
|
|
@ -2,17 +2,17 @@
|
|||
|
||||
fn main() {
|
||||
b'µ';
|
||||
//~^ ERROR: non-ASCII character in byte constant
|
||||
//~^ ERROR: non-ASCII character in byte literal
|
||||
//~| HELP: if you meant to use the unicode code point for 'µ', use a \xHH escape
|
||||
//~| NOTE: byte constant must be ASCII
|
||||
//~| NOTE: must be ASCII
|
||||
|
||||
b'字';
|
||||
//~^ ERROR: non-ASCII character in byte constant
|
||||
//~^ ERROR: non-ASCII character in byte literal
|
||||
//~| NOTE: this multibyte character does not fit into a single byte
|
||||
//~| NOTE: byte constant must be ASCII
|
||||
//~| NOTE: must be ASCII
|
||||
|
||||
b"字";
|
||||
//~^ ERROR: non-ASCII character in byte constant
|
||||
//~^ ERROR: non-ASCII character in byte string literal
|
||||
//~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes
|
||||
//~| NOTE: byte constant must be ASCII
|
||||
//~| NOTE: must be ASCII
|
||||
}
|
||||
|
|
|
@ -1,28 +1,28 @@
|
|||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte literal
|
||||
--> $DIR/multibyte-escapes.rs:4:7
|
||||
|
|
||||
LL | b'µ';
|
||||
| ^ byte constant must be ASCII
|
||||
| ^ must be ASCII
|
||||
|
|
||||
help: if you meant to use the unicode code point for 'µ', use a \xHH escape
|
||||
|
|
||||
LL | b'\xB5';
|
||||
| ~~~~
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte literal
|
||||
--> $DIR/multibyte-escapes.rs:9:7
|
||||
|
|
||||
LL | b'字';
|
||||
| ^^
|
||||
| |
|
||||
| byte constant must be ASCII
|
||||
| must be ASCII
|
||||
| this multibyte character does not fit into a single byte
|
||||
|
||||
error: non-ASCII character in byte constant
|
||||
error: non-ASCII character in byte string literal
|
||||
--> $DIR/multibyte-escapes.rs:14:7
|
||||
|
|
||||
LL | b"字";
|
||||
| ^^ byte constant must be ASCII
|
||||
| ^^ must be ASCII
|
||||
|
|
||||
help: if you meant to use the UTF-8 encoding of '字', use \xHH escapes
|
||||
|
|
||||
|
|
Loading…
Add table
Reference in a new issue