Remove TokenKind::InvalidPrefix
.
It was added in #123752 to handle some cases involving emoji, but it isn't necessary because it's always treated the same as `TokenKind::InvalidIdent`. This commit removes it, which makes things a little simpler.
This commit is contained in:
parent
2c7c3697db
commit
e9a0c3c98c
4 changed files with 14 additions and 21 deletions
|
@ -99,10 +99,6 @@ pub enum TokenKind {
|
||||||
/// several tokens: `'r` and `#` and `foo`.
|
/// several tokens: `'r` and `#` and `foo`.
|
||||||
RawLifetime,
|
RawLifetime,
|
||||||
|
|
||||||
/// Similar to the above, but *always* an error on every edition. This is used
|
|
||||||
/// for emoji identifier recovery, as those are not meant to be ever accepted.
|
|
||||||
InvalidPrefix,
|
|
||||||
|
|
||||||
/// Guarded string literal prefix: `#"` or `##`.
|
/// Guarded string literal prefix: `#"` or `##`.
|
||||||
///
|
///
|
||||||
/// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
|
/// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
|
||||||
|
@ -466,7 +462,7 @@ impl Cursor<'_> {
|
||||||
Literal { kind, suffix_start }
|
Literal { kind, suffix_start }
|
||||||
}
|
}
|
||||||
// Identifier starting with an emoji. Only lexed for graceful error recovery.
|
// Identifier starting with an emoji. Only lexed for graceful error recovery.
|
||||||
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident_or_prefix(),
|
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
|
||||||
_ => Unknown,
|
_ => Unknown,
|
||||||
};
|
};
|
||||||
let res = Token::new(token_kind, self.pos_within_token());
|
let res = Token::new(token_kind, self.pos_within_token());
|
||||||
|
@ -550,23 +546,22 @@ impl Cursor<'_> {
|
||||||
// we see a prefix here, it is definitely an unknown prefix.
|
// we see a prefix here, it is definitely an unknown prefix.
|
||||||
match self.first() {
|
match self.first() {
|
||||||
'#' | '"' | '\'' => UnknownPrefix,
|
'#' | '"' | '\'' => UnknownPrefix,
|
||||||
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident_or_prefix(),
|
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
|
||||||
_ => Ident,
|
_ => Ident,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn invalid_ident_or_prefix(&mut self) -> TokenKind {
|
fn invalid_ident(&mut self) -> TokenKind {
|
||||||
// Start is already eaten, eat the rest of identifier.
|
// Start is already eaten, eat the rest of identifier.
|
||||||
self.eat_while(|c| {
|
self.eat_while(|c| {
|
||||||
const ZERO_WIDTH_JOINER: char = '\u{200d}';
|
const ZERO_WIDTH_JOINER: char = '\u{200d}';
|
||||||
is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
|
is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
|
||||||
});
|
});
|
||||||
// Known prefixes must have been handled earlier. So if
|
// An invalid identifier followed by '#' or '"' or '\'' could be
|
||||||
// we see a prefix here, it is definitely an unknown prefix.
|
// interpreted as an invalid literal prefix. We don't bother doing that
|
||||||
match self.first() {
|
// because the treatment of invalid identifiers and invalid prefixes
|
||||||
'#' | '"' | '\'' => InvalidPrefix,
|
// would be the same.
|
||||||
_ => InvalidIdent,
|
InvalidIdent
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn c_or_byte_string(
|
fn c_or_byte_string(
|
||||||
|
|
|
@ -213,7 +213,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
|
||||||
let ident = Symbol::intern(lifetime_name);
|
let ident = Symbol::intern(lifetime_name);
|
||||||
token::Lifetime(ident, IdentIsRaw::No)
|
token::Lifetime(ident, IdentIsRaw::No)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix
|
rustc_lexer::TokenKind::InvalidIdent
|
||||||
// Do not recover an identifier with emoji if the codepoint is a confusable
|
// Do not recover an identifier with emoji if the codepoint is a confusable
|
||||||
// with a recoverable substitution token, like `➖`.
|
// with a recoverable substitution token, like `➖`.
|
||||||
if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
|
if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
|
||||||
|
@ -359,8 +359,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
|
||||||
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Unknown
|
rustc_lexer::TokenKind::Unknown
|
||||||
| rustc_lexer::TokenKind::InvalidIdent
|
| rustc_lexer::TokenKind::InvalidIdent => {
|
||||||
| rustc_lexer::TokenKind::InvalidPrefix => {
|
|
||||||
// Don't emit diagnostics for sequences of the same invalid token
|
// Don't emit diagnostics for sequences of the same invalid token
|
||||||
if swallow_next_invalid > 0 {
|
if swallow_next_invalid > 0 {
|
||||||
swallow_next_invalid -= 1;
|
swallow_next_invalid -= 1;
|
||||||
|
|
|
@ -861,10 +861,9 @@ impl<'src> Classifier<'src> {
|
||||||
},
|
},
|
||||||
Some(c) => c,
|
Some(c) => c,
|
||||||
},
|
},
|
||||||
TokenKind::RawIdent
|
TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
|
||||||
| TokenKind::UnknownPrefix
|
Class::Ident(self.new_span(before, text))
|
||||||
| TokenKind::InvalidPrefix
|
}
|
||||||
| TokenKind::InvalidIdent => Class::Ident(self.new_span(before, text)),
|
|
||||||
TokenKind::Lifetime { .. }
|
TokenKind::Lifetime { .. }
|
||||||
| TokenKind::RawLifetime
|
| TokenKind::RawLifetime
|
||||||
| TokenKind::UnknownPrefixLifetime => Class::Lifetime,
|
| TokenKind::UnknownPrefixLifetime => Class::Lifetime,
|
||||||
|
|
|
@ -183,7 +183,7 @@ impl<'a> Converter<'a> {
|
||||||
rustc_lexer::TokenKind::Ident => {
|
rustc_lexer::TokenKind::Ident => {
|
||||||
SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
|
SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::InvalidPrefix | rustc_lexer::TokenKind::InvalidIdent => {
|
rustc_lexer::TokenKind::InvalidIdent => {
|
||||||
err = "Ident contains invalid characters";
|
err = "Ident contains invalid characters";
|
||||||
IDENT
|
IDENT
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue