Rollup merge of #108031 - jieyouxu:issue-108019, r=estebank
Don't recover lifetimes/labels containing emojis as character literals Fixes #108019. Note that at the time of this commit, `unic-emoji-char` seems to have data tables only up to Unicode 5.0, but Unicode is already newer than this. A newer emoji such as `🥺` will not be recognized as an emoji but older emojis such as `🐱` will. This PR leaves a couple of FIXMEs where `unic_emoji_char::is_emoji` is used.
This commit is contained in:
commit
3035ccbcb9
8 changed files with 178 additions and 17 deletions
|
@ -471,6 +471,8 @@ pub enum StashKey {
|
||||||
/// When an invalid lifetime e.g. `'2` should be reinterpreted
|
/// When an invalid lifetime e.g. `'2` should be reinterpreted
|
||||||
/// as a char literal in the parser
|
/// as a char literal in the parser
|
||||||
LifetimeIsChar,
|
LifetimeIsChar,
|
||||||
|
/// When an invalid lifetime e.g. `'🐱` contains emoji.
|
||||||
|
LifetimeContainsEmoji,
|
||||||
/// Maybe there was a typo where a comma was forgotten before
|
/// Maybe there was a typo where a comma was forgotten before
|
||||||
/// FRU syntax
|
/// FRU syntax
|
||||||
MaybeFruTypo,
|
MaybeFruTypo,
|
||||||
|
|
|
@ -95,7 +95,7 @@ pub enum TokenKind {
|
||||||
Literal { kind: LiteralKind, suffix_start: u32 },
|
Literal { kind: LiteralKind, suffix_start: u32 },
|
||||||
|
|
||||||
/// "'a"
|
/// "'a"
|
||||||
Lifetime { starts_with_number: bool },
|
Lifetime { starts_with_number: bool, contains_emoji: bool },
|
||||||
|
|
||||||
// One-char tokens:
|
// One-char tokens:
|
||||||
/// ";"
|
/// ";"
|
||||||
|
@ -630,7 +630,13 @@ impl Cursor<'_> {
|
||||||
// If the first symbol is valid for identifier, it can be a lifetime.
|
// If the first symbol is valid for identifier, it can be a lifetime.
|
||||||
// Also check if it's a number for a better error reporting (so '0 will
|
// Also check if it's a number for a better error reporting (so '0 will
|
||||||
// be reported as invalid lifetime and not as unterminated char literal).
|
// be reported as invalid lifetime and not as unterminated char literal).
|
||||||
is_id_start(self.first()) || self.first().is_digit(10)
|
// We also have to account for potential `'🐱` emojis to avoid reporting
|
||||||
|
// it as an unterminated char literal.
|
||||||
|
is_id_start(self.first())
|
||||||
|
|| self.first().is_digit(10)
|
||||||
|
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
|
||||||
|
// 5.0, but Unicode is already newer than this.
|
||||||
|
|| unic_emoji_char::is_emoji(self.first())
|
||||||
};
|
};
|
||||||
|
|
||||||
if !can_be_a_lifetime {
|
if !can_be_a_lifetime {
|
||||||
|
@ -643,16 +649,33 @@ impl Cursor<'_> {
|
||||||
return Literal { kind, suffix_start };
|
return Literal { kind, suffix_start };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Either a lifetime or a character literal with
|
// Either a lifetime or a character literal.
|
||||||
// length greater than 1.
|
|
||||||
|
|
||||||
let starts_with_number = self.first().is_digit(10);
|
let starts_with_number = self.first().is_digit(10);
|
||||||
|
let mut contains_emoji = false;
|
||||||
|
|
||||||
// Skip the literal contents.
|
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
|
||||||
// First symbol can be a number (which isn't a valid identifier start),
|
// 5.0, but Unicode is already newer than this.
|
||||||
// so skip it without any checks.
|
if unic_emoji_char::is_emoji(self.first()) {
|
||||||
self.bump();
|
contains_emoji = true;
|
||||||
self.eat_while(is_id_continue);
|
} else {
|
||||||
|
// Skip the literal contents.
|
||||||
|
// First symbol can be a number (which isn't a valid identifier start),
|
||||||
|
// so skip it without any checks.
|
||||||
|
self.bump();
|
||||||
|
}
|
||||||
|
self.eat_while(|c| {
|
||||||
|
if is_id_continue(c) {
|
||||||
|
true
|
||||||
|
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
|
||||||
|
// 5.0, but Unicode is already newer than this.
|
||||||
|
} else if unic_emoji_char::is_emoji(c) {
|
||||||
|
contains_emoji = true;
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Check if after skipping literal contents we've met a closing
|
// Check if after skipping literal contents we've met a closing
|
||||||
// single quote (which means that user attempted to create a
|
// single quote (which means that user attempted to create a
|
||||||
|
@ -662,7 +685,7 @@ impl Cursor<'_> {
|
||||||
let kind = Char { terminated: true };
|
let kind = Char { terminated: true };
|
||||||
Literal { kind, suffix_start: self.pos_within_token() }
|
Literal { kind, suffix_start: self.pos_within_token() }
|
||||||
} else {
|
} else {
|
||||||
Lifetime { starts_with_number }
|
Lifetime { starts_with_number, contains_emoji }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -235,7 +235,7 @@ fn lifetime() {
|
||||||
check_lexing(
|
check_lexing(
|
||||||
"'abc",
|
"'abc",
|
||||||
expect![[r#"
|
expect![[r#"
|
||||||
Token { kind: Lifetime { starts_with_number: false }, len: 4 }
|
Token { kind: Lifetime { starts_with_number: false, contains_emoji: false }, len: 4 }
|
||||||
"#]],
|
"#]],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,16 +200,21 @@ impl<'a> StringReader<'a> {
|
||||||
};
|
};
|
||||||
token::Literal(token::Lit { kind, symbol, suffix })
|
token::Literal(token::Lit { kind, symbol, suffix })
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
|
rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => {
|
||||||
// Include the leading `'` in the real identifier, for macro
|
// Include the leading `'` in the real identifier, for macro
|
||||||
// expansion purposes. See #12512 for the gory details of why
|
// expansion purposes. See #12512 for the gory details of why
|
||||||
// this is necessary.
|
// this is necessary.
|
||||||
let lifetime_name = self.str_from(start);
|
let lifetime_name = self.str_from(start);
|
||||||
if starts_with_number {
|
if starts_with_number {
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
|
let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number");
|
||||||
diag.set_span(span);
|
diag.set_span(span);
|
||||||
diag.stash(span, StashKey::LifetimeIsChar);
|
diag.stash(span, StashKey::LifetimeIsChar);
|
||||||
|
} else if contains_emoji {
|
||||||
|
let span = self.mk_sp(start, self.pos);
|
||||||
|
let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis");
|
||||||
|
diag.set_span(span);
|
||||||
|
diag.stash(span, StashKey::LifetimeContainsEmoji);
|
||||||
}
|
}
|
||||||
let ident = Symbol::intern(lifetime_name);
|
let ident = Symbol::intern(lifetime_name);
|
||||||
token::Lifetime(ident)
|
token::Lifetime(ident)
|
||||||
|
|
45
tests/ui/lexer/issue-108019-bad-emoji-recovery.rs
Normal file
45
tests/ui/lexer/issue-108019-bad-emoji-recovery.rs
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
#![allow(unused_labels)]
|
||||||
|
|
||||||
|
// FIXME(#108019): outdated Unicode table
|
||||||
|
// fn foo() {
|
||||||
|
// '🥺 loop {
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
fn bar() {
|
||||||
|
'🐱 loop {
|
||||||
|
//~^ ERROR labeled expression must be followed by `:`
|
||||||
|
//~| ERROR lifetimes or labels cannot contain emojis
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn qux() {
|
||||||
|
'a🐱 loop {
|
||||||
|
//~^ ERROR labeled expression must be followed by `:`
|
||||||
|
//~| ERROR lifetimes or labels cannot contain emojis
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quux() {
|
||||||
|
'1🐱 loop {
|
||||||
|
//~^ ERROR labeled expression must be followed by `:`
|
||||||
|
//~| ERROR lifetimes or labels cannot start with a number
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn x<'🐱>() -> &'🐱 () {
|
||||||
|
//~^ ERROR lifetimes or labels cannot contain emojis
|
||||||
|
//~| ERROR lifetimes or labels cannot contain emojis
|
||||||
|
&()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn y() {
|
||||||
|
'a🐱: loop {}
|
||||||
|
//~^ ERROR lifetimes or labels cannot contain emojis
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {}
|
86
tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr
Normal file
86
tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
error: labeled expression must be followed by `:`
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
||||||
|
|
|
||||||
|
LL | '🐱 loop {
|
||||||
|
| ^--- help: add `:` after the label
|
||||||
|
| |
|
||||||
|
| _____the label
|
||||||
|
| |
|
||||||
|
LL | |
|
||||||
|
LL | |
|
||||||
|
LL | | break
|
||||||
|
LL | | }
|
||||||
|
| |_____^
|
||||||
|
|
|
||||||
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
|
||||||
|
|
||||||
|
error: labeled expression must be followed by `:`
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
||||||
|
|
|
||||||
|
LL | 'a🐱 loop {
|
||||||
|
| ^---- help: add `:` after the label
|
||||||
|
| |
|
||||||
|
| _____the label
|
||||||
|
| |
|
||||||
|
LL | |
|
||||||
|
LL | |
|
||||||
|
LL | | break
|
||||||
|
LL | | }
|
||||||
|
| |_____^
|
||||||
|
|
|
||||||
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
|
||||||
|
|
||||||
|
error: labeled expression must be followed by `:`
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
||||||
|
|
|
||||||
|
LL | '1🐱 loop {
|
||||||
|
| ^---- help: add `:` after the label
|
||||||
|
| |
|
||||||
|
| _____the label
|
||||||
|
| |
|
||||||
|
LL | |
|
||||||
|
LL | |
|
||||||
|
LL | | break
|
||||||
|
LL | | }
|
||||||
|
| |_____^
|
||||||
|
|
|
||||||
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
|
||||||
|
|
||||||
|
error: lifetimes or labels cannot contain emojis
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
||||||
|
|
|
||||||
|
LL | '🐱 loop {
|
||||||
|
| ^^^
|
||||||
|
|
||||||
|
error: lifetimes or labels cannot contain emojis
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
||||||
|
|
|
||||||
|
LL | 'a🐱 loop {
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
|
error: lifetimes or labels cannot start with a number
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
||||||
|
|
|
||||||
|
LL | '1🐱 loop {
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
|
error: lifetimes or labels cannot contain emojis
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:6
|
||||||
|
|
|
||||||
|
LL | fn x<'🐱>() -> &'🐱 () {
|
||||||
|
| ^^^
|
||||||
|
|
||||||
|
error: lifetimes or labels cannot contain emojis
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:16
|
||||||
|
|
|
||||||
|
LL | fn x<'🐱>() -> &'🐱 () {
|
||||||
|
| ^^^
|
||||||
|
|
||||||
|
error: lifetimes or labels cannot contain emojis
|
||||||
|
--> $DIR/issue-108019-bad-emoji-recovery.rs:41:5
|
||||||
|
|
|
||||||
|
LL | 'a🐱: loop {}
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
|
error: aborting due to 9 previous errors
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
struct S<'1> { s: &'1 usize }
|
struct S<'1> { s: &'1 usize }
|
||||||
//~^ ERROR lifetimes cannot start with a number
|
//~^ ERROR lifetimes or labels cannot start with a number
|
||||||
//~| ERROR lifetimes cannot start with a number
|
//~| ERROR lifetimes or labels cannot start with a number
|
||||||
fn main() {
|
fn main() {
|
||||||
// verify that the parse error doesn't stop type checking
|
// verify that the parse error doesn't stop type checking
|
||||||
let x: usize = "";
|
let x: usize = "";
|
||||||
|
|
|
@ -6,13 +6,13 @@ LL | let x: usize = "";
|
||||||
| |
|
| |
|
||||||
| expected due to this
|
| expected due to this
|
||||||
|
|
||||||
error: lifetimes cannot start with a number
|
error: lifetimes or labels cannot start with a number
|
||||||
--> $DIR/numeric-lifetime.rs:1:10
|
--> $DIR/numeric-lifetime.rs:1:10
|
||||||
|
|
|
|
||||||
LL | struct S<'1> { s: &'1 usize }
|
LL | struct S<'1> { s: &'1 usize }
|
||||||
| ^^
|
| ^^
|
||||||
|
|
||||||
error: lifetimes cannot start with a number
|
error: lifetimes or labels cannot start with a number
|
||||||
--> $DIR/numeric-lifetime.rs:1:20
|
--> $DIR/numeric-lifetime.rs:1:20
|
||||||
|
|
|
|
||||||
LL | struct S<'1> { s: &'1 usize }
|
LL | struct S<'1> { s: &'1 usize }
|
||||||
|
|
Loading…
Add table
Reference in a new issue