Properly handle emojis as literal prefix in macros
Do not accept the following ```rust macro_rules! lexes {($($_:tt)*) => {}} lexes!(🐛"foo"); ``` Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro expansion of literal prefixes. Fix #123696.
This commit is contained in:
parent
e78913baef
commit
19821ad234
6 changed files with 36 additions and 6 deletions
|
@ -88,6 +88,10 @@ pub enum TokenKind {
|
||||||
/// tokens.
|
/// tokens.
|
||||||
UnknownPrefix,
|
UnknownPrefix,
|
||||||
|
|
||||||
|
/// Similar to the above, but *always* an error on every edition. This is used
|
||||||
|
/// for emoji identifier recovery, as those are not meant to be ever accepted.
|
||||||
|
InvalidPrefix,
|
||||||
|
|
||||||
/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
|
/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
|
||||||
/// suffix, but may be present here on string and float literals. Users of
|
/// suffix, but may be present here on string and float literals. Users of
|
||||||
/// this type will need to check for and reject that case.
|
/// this type will need to check for and reject that case.
|
||||||
|
@ -528,7 +532,7 @@ impl Cursor<'_> {
|
||||||
// Known prefixes must have been handled earlier. So if
|
// Known prefixes must have been handled earlier. So if
|
||||||
// we see a prefix here, it is definitely an unknown prefix.
|
// we see a prefix here, it is definitely an unknown prefix.
|
||||||
match self.first() {
|
match self.first() {
|
||||||
'#' | '"' | '\'' => UnknownPrefix,
|
'#' | '"' | '\'' => InvalidPrefix,
|
||||||
_ => InvalidIdent,
|
_ => InvalidIdent,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -205,6 +205,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
|
||||||
self.ident(start)
|
self.ident(start)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::InvalidIdent
|
rustc_lexer::TokenKind::InvalidIdent
|
||||||
|
| rustc_lexer::TokenKind::InvalidPrefix
|
||||||
// Do not recover an identifier with emoji if the codepoint is a confusable
|
// Do not recover an identifier with emoji if the codepoint is a confusable
|
||||||
// with a recoverable substitution token, like `➖`.
|
// with a recoverable substitution token, like `➖`.
|
||||||
if !UNICODE_ARRAY
|
if !UNICODE_ARRAY
|
||||||
|
@ -302,7 +303,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
|
||||||
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
|
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
|
||||||
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
|
rustc_lexer::TokenKind::Unknown
|
||||||
|
| rustc_lexer::TokenKind::InvalidIdent
|
||||||
|
| rustc_lexer::TokenKind::InvalidPrefix => {
|
||||||
// Don't emit diagnostics for sequences of the same invalid token
|
// Don't emit diagnostics for sequences of the same invalid token
|
||||||
if swallow_next_invalid > 0 {
|
if swallow_next_invalid > 0 {
|
||||||
swallow_next_invalid -= 1;
|
swallow_next_invalid -= 1;
|
||||||
|
|
|
@ -876,9 +876,10 @@ impl<'src> Classifier<'src> {
|
||||||
},
|
},
|
||||||
Some(c) => c,
|
Some(c) => c,
|
||||||
},
|
},
|
||||||
TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
|
TokenKind::RawIdent
|
||||||
Class::Ident(self.new_span(before, text))
|
| TokenKind::UnknownPrefix
|
||||||
}
|
| TokenKind::InvalidPrefix
|
||||||
|
| TokenKind::InvalidIdent => Class::Ident(self.new_span(before, text)),
|
||||||
TokenKind::Lifetime { .. } => Class::Lifetime,
|
TokenKind::Lifetime { .. } => Class::Lifetime,
|
||||||
TokenKind::Eof => panic!("Eof in advance"),
|
TokenKind::Eof => panic!("Eof in advance"),
|
||||||
};
|
};
|
||||||
|
|
|
@ -178,7 +178,7 @@ impl<'a> Converter<'a> {
|
||||||
rustc_lexer::TokenKind::Ident => {
|
rustc_lexer::TokenKind::Ident => {
|
||||||
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
|
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::InvalidIdent => {
|
rustc_lexer::TokenKind::InvalidPrefix | rustc_lexer::TokenKind::InvalidIdent => {
|
||||||
err = "Ident contains invalid characters";
|
err = "Ident contains invalid characters";
|
||||||
IDENT
|
IDENT
|
||||||
}
|
}
|
||||||
|
|
8
tests/ui/lexer/emoji-literal-prefix.rs
Normal file
8
tests/ui/lexer/emoji-literal-prefix.rs
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
macro_rules! lexes {($($_:tt)*) => {}}
|
||||||
|
|
||||||
|
lexes!(🐛#); //~ ERROR identifiers cannot contain emoji
|
||||||
|
lexes!(🐛"foo");
|
||||||
|
lexes!(🐛'q');
|
||||||
|
lexes!(🐛'q);
|
||||||
|
|
||||||
|
fn main() {}
|
14
tests/ui/lexer/emoji-literal-prefix.stderr
Normal file
14
tests/ui/lexer/emoji-literal-prefix.stderr
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
error: identifiers cannot contain emoji: `🐛`
|
||||||
|
--> $DIR/emoji-literal-prefix.rs:3:8
|
||||||
|
|
|
||||||
|
LL | lexes!(🐛#);
|
||||||
|
| ^^
|
||||||
|
LL | lexes!(🐛"foo");
|
||||||
|
| ^^
|
||||||
|
LL | lexes!(🐛'q');
|
||||||
|
| ^^
|
||||||
|
LL | lexes!(🐛'q);
|
||||||
|
| ^^
|
||||||
|
|
||||||
|
error: aborting due to 1 previous error
|
||||||
|
|
Loading…
Add table
Reference in a new issue