Update lexer emoji diagnostics to Unicode 15.0
This commit is contained in:
parent
04abc370b9
commit
bca79a26d8
6 changed files with 36 additions and 76 deletions
49
Cargo.lock
49
Cargo.lock
|
@ -3785,7 +3785,7 @@ name = "rustc_lexer"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"expect-test",
|
"expect-test",
|
||||||
"unic-emoji-char",
|
"unicode-properties",
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -5445,38 +5445,6 @@ dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unic-char-property"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
|
|
||||||
dependencies = [
|
|
||||||
"unic-char-range",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unic-char-range"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unic-common"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unic-emoji-char"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d"
|
|
||||||
dependencies = [
|
|
||||||
"unic-char-property",
|
|
||||||
"unic-char-range",
|
|
||||||
"unic-ucd-version",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unic-langid"
|
name = "unic-langid"
|
||||||
version = "0.9.1"
|
version = "0.9.1"
|
||||||
|
@ -5520,15 +5488,6 @@ dependencies = [
|
||||||
"unic-langid-impl",
|
"unic-langid-impl",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unic-ucd-version"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
|
|
||||||
dependencies = [
|
|
||||||
"unic-common",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicase"
|
name = "unicase"
|
||||||
version = "2.6.0"
|
version = "2.6.0"
|
||||||
|
@ -5566,6 +5525,12 @@ dependencies = [
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-properties"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7f91c8b21fbbaa18853c3d0801c78f4fc94cdb976699bb03e832e75f7fd22f0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-script"
|
name = "unicode-script"
|
||||||
version = "0.5.5"
|
version = "0.5.5"
|
||||||
|
|
|
@ -16,7 +16,11 @@ Rust lexer used by rustc. No stability guarantees are provided.
|
||||||
# Note that this crate purposefully does not depend on other rustc crates
|
# Note that this crate purposefully does not depend on other rustc crates
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicode-xid = "0.2.0"
|
unicode-xid = "0.2.0"
|
||||||
unic-emoji-char = "0.9.0"
|
|
||||||
|
[dependencies.unicode-properties]
|
||||||
|
version = "0.1.0"
|
||||||
|
default-features = false
|
||||||
|
features = ["emoji"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
expect-test = "1.4.0"
|
expect-test = "1.4.0"
|
||||||
|
|
|
@ -34,6 +34,7 @@ pub use crate::cursor::Cursor;
|
||||||
use self::LiteralKind::*;
|
use self::LiteralKind::*;
|
||||||
use self::TokenKind::*;
|
use self::TokenKind::*;
|
||||||
use crate::cursor::EOF_CHAR;
|
use crate::cursor::EOF_CHAR;
|
||||||
|
use unicode_properties::UnicodeEmoji;
|
||||||
|
|
||||||
/// Parsed token.
|
/// Parsed token.
|
||||||
/// It doesn't contain information about data that has been parsed,
|
/// It doesn't contain information about data that has been parsed,
|
||||||
|
@ -428,9 +429,7 @@ impl Cursor<'_> {
|
||||||
Literal { kind, suffix_start }
|
Literal { kind, suffix_start }
|
||||||
}
|
}
|
||||||
// Identifier starting with an emoji. Only lexed for graceful error recovery.
|
// Identifier starting with an emoji. Only lexed for graceful error recovery.
|
||||||
c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
|
c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
|
||||||
self.fake_ident_or_unknown_prefix()
|
|
||||||
}
|
|
||||||
_ => Unknown,
|
_ => Unknown,
|
||||||
};
|
};
|
||||||
let res = Token::new(token_kind, self.pos_within_token());
|
let res = Token::new(token_kind, self.pos_within_token());
|
||||||
|
@ -514,9 +513,7 @@ impl Cursor<'_> {
|
||||||
// we see a prefix here, it is definitely an unknown prefix.
|
// we see a prefix here, it is definitely an unknown prefix.
|
||||||
match self.first() {
|
match self.first() {
|
||||||
'#' | '"' | '\'' => UnknownPrefix,
|
'#' | '"' | '\'' => UnknownPrefix,
|
||||||
c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
|
c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
|
||||||
self.fake_ident_or_unknown_prefix()
|
|
||||||
}
|
|
||||||
_ => Ident,
|
_ => Ident,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -525,7 +522,7 @@ impl Cursor<'_> {
|
||||||
// Start is already eaten, eat the rest of identifier.
|
// Start is already eaten, eat the rest of identifier.
|
||||||
self.eat_while(|c| {
|
self.eat_while(|c| {
|
||||||
unicode_xid::UnicodeXID::is_xid_continue(c)
|
unicode_xid::UnicodeXID::is_xid_continue(c)
|
||||||
|| (!c.is_ascii() && unic_emoji_char::is_emoji(c))
|
|| (!c.is_ascii() && c.is_emoji_char())
|
||||||
|| c == '\u{200d}'
|
|| c == '\u{200d}'
|
||||||
});
|
});
|
||||||
// Known prefixes must have been handled earlier. So if
|
// Known prefixes must have been handled earlier. So if
|
||||||
|
|
|
@ -270,18 +270,14 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
|
||||||
"twox-hash",
|
"twox-hash",
|
||||||
"type-map",
|
"type-map",
|
||||||
"typenum",
|
"typenum",
|
||||||
"unic-char-property",
|
|
||||||
"unic-char-range",
|
|
||||||
"unic-common",
|
|
||||||
"unic-emoji-char",
|
|
||||||
"unic-langid",
|
"unic-langid",
|
||||||
"unic-langid-impl",
|
"unic-langid-impl",
|
||||||
"unic-langid-macros",
|
"unic-langid-macros",
|
||||||
"unic-langid-macros-impl",
|
"unic-langid-macros-impl",
|
||||||
"unic-ucd-version",
|
|
||||||
"unicase",
|
"unicase",
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
"unicode-normalization",
|
"unicode-normalization",
|
||||||
|
"unicode-properties",
|
||||||
"unicode-script",
|
"unicode-script",
|
||||||
"unicode-security",
|
"unicode-security",
|
||||||
"unicode-width",
|
"unicode-width",
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
fn invalid_emoji_usages() {
|
fn invalid_emoji_usages() {
|
||||||
let arrow↔️ = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
|
let arrow↔️ = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
|
||||||
// FIXME
|
let planet🪐 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
|
||||||
let planet🪐 = "basic emoji"; //~ ERROR: unknown start of token
|
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
|
||||||
// FIXME
|
|
||||||
let wireless🛜 = "basic emoji"; //~ ERROR: unknown start of token
|
|
||||||
// FIXME
|
// FIXME
|
||||||
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
|
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
|
||||||
//~^ WARN: identifier contains uncommon Unicode codepoints
|
//~^ WARN: identifier contains uncommon Unicode codepoints
|
||||||
|
|
|
@ -1,17 +1,5 @@
|
||||||
error: unknown start of token: \u{1fa90}
|
|
||||||
--> $DIR/lex-emoji-identifiers.rs:4:15
|
|
||||||
|
|
|
||||||
LL | let planet🪐 = "basic emoji";
|
|
||||||
| ^^
|
|
||||||
|
|
||||||
error: unknown start of token: \u{1f6dc}
|
|
||||||
--> $DIR/lex-emoji-identifiers.rs:6:17
|
|
||||||
|
|
|
||||||
LL | let wireless🛜 = "basic emoji";
|
|
||||||
| ^^
|
|
||||||
|
|
||||||
error: unknown start of token: \u{20e3}
|
error: unknown start of token: \u{20e3}
|
||||||
--> $DIR/lex-emoji-identifiers.rs:8:14
|
--> $DIR/lex-emoji-identifiers.rs:6:14
|
||||||
|
|
|
|
||||||
LL | let key1️⃣ = "keycap sequence";
|
LL | let key1️⃣ = "keycap sequence";
|
||||||
| ^
|
| ^
|
||||||
|
@ -22,26 +10,38 @@ error: identifiers cannot contain emoji: `arrow↔️`
|
||||||
LL | let arrow↔️ = "basic emoji";
|
LL | let arrow↔️ = "basic emoji";
|
||||||
| ^^^^^^
|
| ^^^^^^
|
||||||
|
|
||||||
|
error: identifiers cannot contain emoji: `planet🪐`
|
||||||
|
--> $DIR/lex-emoji-identifiers.rs:3:9
|
||||||
|
|
|
||||||
|
LL | let planet🪐 = "basic emoji";
|
||||||
|
| ^^^^^^^^
|
||||||
|
|
||||||
|
error: identifiers cannot contain emoji: `wireless🛜`
|
||||||
|
--> $DIR/lex-emoji-identifiers.rs:4:9
|
||||||
|
|
|
||||||
|
LL | let wireless🛜 = "basic emoji";
|
||||||
|
| ^^^^^^^^^^
|
||||||
|
|
||||||
error: identifiers cannot contain emoji: `flag🇺🇳`
|
error: identifiers cannot contain emoji: `flag🇺🇳`
|
||||||
--> $DIR/lex-emoji-identifiers.rs:10:9
|
--> $DIR/lex-emoji-identifiers.rs:8:9
|
||||||
|
|
|
|
||||||
LL | let flag🇺🇳 = "flag sequence";
|
LL | let flag🇺🇳 = "flag sequence";
|
||||||
| ^^^^^^
|
| ^^^^^^
|
||||||
|
|
||||||
error: identifiers cannot contain emoji: `wales🏴`
|
error: identifiers cannot contain emoji: `wales🏴`
|
||||||
--> $DIR/lex-emoji-identifiers.rs:11:9
|
--> $DIR/lex-emoji-identifiers.rs:9:9
|
||||||
|
|
|
|
||||||
LL | let wales🏴 = "tag sequence";
|
LL | let wales🏴 = "tag sequence";
|
||||||
| ^^^^^^^
|
| ^^^^^^^
|
||||||
|
|
||||||
error: identifiers cannot contain emoji: `folded🙏🏿`
|
error: identifiers cannot contain emoji: `folded🙏🏿`
|
||||||
--> $DIR/lex-emoji-identifiers.rs:12:9
|
--> $DIR/lex-emoji-identifiers.rs:10:9
|
||||||
|
|
|
|
||||||
LL | let folded🙏🏿 = "modifier sequence";
|
LL | let folded🙏🏿 = "modifier sequence";
|
||||||
| ^^^^^^^^^^
|
| ^^^^^^^^^^
|
||||||
|
|
||||||
warning: identifier contains uncommon Unicode codepoints
|
warning: identifier contains uncommon Unicode codepoints
|
||||||
--> $DIR/lex-emoji-identifiers.rs:8:9
|
--> $DIR/lex-emoji-identifiers.rs:6:9
|
||||||
|
|
|
|
||||||
LL | let key1️⃣ = "keycap sequence";
|
LL | let key1️⃣ = "keycap sequence";
|
||||||
| ^^^^
|
| ^^^^
|
||||||
|
|
Loading…
Add table
Reference in a new issue