From cee88f7a3f1c5bc284d0c6a87fb1d5535ebb3af8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 17 Apr 2024 09:37:00 +1000 Subject: [PATCH] Prepare for invisible delimiters. Current places where `Interpolated` is used are going to change to instead use invisible delimiters. This prepares for that. - It adds invisible delimiter cases to the `can_begin_*`/`may_be_*` methods and the `failed_to_match_macro` that are equivalent to the existing `Interpolated` cases. - It adds panics/asserts in some places where invisible delimiters should never occur. - In `Parser::parse_struct_fields` it excludes an ident + invisible delimiter from special consideration in an error message, because that's quite different to an ident + paren/brace/bracket. --- compiler/rustc_ast/src/token.rs | 38 +++++++++++++- compiler/rustc_expand/src/mbe/diagnostics.rs | 6 ++- compiler/rustc_parse/src/lexer/tokentrees.rs | 16 ++++-- compiler/rustc_parse/src/parser/expr.rs | 12 ++++- .../rustc_parse/src/parser/nonterminal.rs | 50 +++++++++++++++++-- 5 files changed, 108 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 592b56cc08f..678f43e3511 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -598,10 +598,11 @@ impl Token { /// **NB**: Take care when modifying this function, since it will change /// the stable set of tokens that are allowed to match an expr nonterminal. pub fn can_begin_expr(&self) -> bool { + use Delimiter::*; match self.uninterpolate().kind { Ident(name, is_raw) => ident_can_begin_expr(name, self.span, is_raw), // value name or keyword - OpenDelim(..) | // tuple, array or block + OpenDelim(Parenthesis | Brace | Bracket) | // tuple, array or block Literal(..) | // literal Not | // operator not BinOp(Minus) | // unary minus @@ -612,7 +613,7 @@ impl Token { // DotDotDot is no longer supported, but we need some way to display the error DotDot | DotDotDot | DotDotEq | // range notation Lt | BinOp(Shl) | // associated path - PathSep | // global path + PathSep | // global path Lifetime(..) | // labeled loop Pound => true, // expression attributes Interpolated(ref nt) => @@ -622,6 +623,12 @@ impl Token { NtLiteral(..) | NtPath(..) ), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Block | + MetaVarKind::Expr { .. } | + MetaVarKind::Literal | + MetaVarKind::Path + ))) => true, _ => false, } } @@ -655,6 +662,14 @@ impl Token { | NtPath(..) | NtTy(..) ), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Expr { .. } | + MetaVarKind::Literal | + MetaVarKind::Meta | + MetaVarKind::Pat(_) | + MetaVarKind::Path | + MetaVarKind::Ty + ))) => true, _ => false, } } @@ -675,6 +690,10 @@ impl Token { Lt | BinOp(Shl) | // associated path PathSep => true, // global path Interpolated(ref nt) => matches!(&**nt, NtTy(..) | NtPath(..)), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Ty | + MetaVarKind::Path + ))) => true, // For anonymous structs or unions, which only appear in specific positions // (type of struct fields or union fields), we don't consider them as regular types _ => false, @@ -687,6 +706,9 @@ impl Token { OpenDelim(Delimiter::Brace) | Literal(..) | BinOp(Minus) => true, Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true, Interpolated(ref nt) => matches!(&**nt, NtExpr(..) | NtBlock(..) | NtLiteral(..)), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Expr { .. } | MetaVarKind::Block | MetaVarKind::Literal, + ))) => true, _ => false, } } @@ -743,6 +765,13 @@ impl Token { }, _ => false, }, + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind { + MetaVarKind::Literal => true, + MetaVarKind::Expr { can_begin_literal_maybe_minus, .. } => { + can_begin_literal_maybe_minus + } + _ => false, + }, _ => false, } } @@ -758,6 +787,11 @@ impl Token { }, _ => false, }, + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind { + MetaVarKind::Literal => true, + MetaVarKind::Expr { can_begin_string_literal, .. } => can_begin_string_literal, + _ => false, + }, _ => false, } } diff --git a/compiler/rustc_expand/src/mbe/diagnostics.rs b/compiler/rustc_expand/src/mbe/diagnostics.rs index ffcce1e52f6..77b8d228922 100644 --- a/compiler/rustc_expand/src/mbe/diagnostics.rs +++ b/compiler/rustc_expand/src/mbe/diagnostics.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, DiagMessage}; use rustc_macros::Subdiagnostic; @@ -68,7 +68,9 @@ pub(super) fn failed_to_match_macro( if let MatcherLoc::Token { token: expected_token } = &remaining_matcher && (matches!(expected_token.kind, TokenKind::Interpolated(_)) - || matches!(token.kind, TokenKind::Interpolated(_))) + || matches!(token.kind, TokenKind::Interpolated(_)) + || matches!(expected_token.kind, TokenKind::OpenDelim(Delimiter::Invisible(_))) + || matches!(token.kind, TokenKind::OpenDelim(Delimiter::Invisible(_)))) { err.note("captured metavariables except for `:tt`, `:ident` and `:lifetime` cannot be compared to other tokens"); err.note("see for more information"); diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index d35c9c7bae7..7b21ffacc84 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -43,11 +43,19 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> { let mut buf = Vec::new(); loop { match self.token.kind { - token::OpenDelim(delim) => buf.push(match self.lex_token_tree_open_delim(delim) { - Ok(val) => val, - Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)), - }), + token::OpenDelim(delim) => { + // Invisible delimiters cannot occur here because `TokenTreesReader` parses + // code directly from strings, with no macro expansion involved. + debug_assert!(!matches!(delim, Delimiter::Invisible(_))); + buf.push(match self.lex_token_tree_open_delim(delim) { + Ok(val) => val, + Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)), + }) + } token::CloseDelim(delim) => { + // Invisible delimiters cannot occur here because `TokenTreesReader` parses + // code directly from strings, with no macro expansion involved. + debug_assert!(!matches!(delim, Delimiter::Invisible(_))); return ( open_spacing, TokenStream::new(buf), diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 0012db471ef..b110d891c84 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -3591,11 +3591,19 @@ impl<'a> Parser<'a> { && !self.token.is_reserved_ident() && self.look_ahead(1, |t| { AssocOp::from_token(t).is_some() - || matches!(t.kind, token::OpenDelim(_)) + || matches!( + t.kind, + token::OpenDelim( + Delimiter::Parenthesis + | Delimiter::Bracket + | Delimiter::Brace + ) + ) || *t == token::Dot }) { - // Looks like they tried to write a shorthand, complex expression. + // Looks like they tried to write a shorthand, complex expression, + // E.g.: `n + m`, `f(a)`, `a[i]`, `S { x: 3 }`, or `x.y`. e.span_suggestion_verbose( self.token.span.shrink_to_lo(), "try naming a field", diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index 43c3de90d9d..8fb6f85d0dd 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -3,7 +3,9 @@ use rustc_ast::ptr::P; use rustc_ast::token::Nonterminal::*; use rustc_ast::token::NtExprKind::*; use rustc_ast::token::NtPatKind::*; -use rustc_ast::token::{self, Delimiter, NonterminalKind, Token}; +use rustc_ast::token::{ + self, Delimiter, InvisibleOrigin, MetaVarKind, Nonterminal, NonterminalKind, Token, +}; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; use rustc_errors::PResult; @@ -22,7 +24,28 @@ impl<'a> Parser<'a> { #[inline] pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool { /// Checks whether the non-terminal may contain a single (non-keyword) identifier. - fn may_be_ident(nt: &token::Nonterminal) -> bool { + fn may_be_ident(kind: MetaVarKind) -> bool { + match kind { + MetaVarKind::Stmt + | MetaVarKind::Pat(_) + | MetaVarKind::Expr { .. } + | MetaVarKind::Ty + | MetaVarKind::Literal // `true`, `false` + | MetaVarKind::Meta + | MetaVarKind::Path => true, + + MetaVarKind::Item + | MetaVarKind::Block + | MetaVarKind::Vis => false, + + MetaVarKind::Ident + | MetaVarKind::Lifetime + | MetaVarKind::TT => unreachable!(), + } + } + + /// Old variant of `may_be_ident`. Being phased out. + fn nt_may_be_ident(nt: &Nonterminal) -> bool { match nt { NtStmt(_) | NtPat(_) @@ -69,7 +92,8 @@ impl<'a> Parser<'a> { | token::Ident(..) | token::NtIdent(..) | token::NtLifetime(..) - | token::Interpolated(_) => true, + | token::Interpolated(_) + | token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(_))) => true, _ => token.can_begin_type(), }, NonterminalKind::Block => match &token.kind { @@ -79,11 +103,29 @@ impl<'a> Parser<'a> { NtBlock(_) | NtStmt(_) | NtExpr(_) | NtLiteral(_) => true, NtItem(_) | NtPat(_) | NtTy(_) | NtMeta(_) | NtPath(_) | NtVis(_) => false, }, + token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(k))) => match k { + MetaVarKind::Block + | MetaVarKind::Stmt + | MetaVarKind::Expr { .. } + | MetaVarKind::Literal => true, + MetaVarKind::Item + | MetaVarKind::Pat(_) + | MetaVarKind::Ty + | MetaVarKind::Meta + | MetaVarKind::Path + | MetaVarKind::Vis => false, + MetaVarKind::Lifetime | MetaVarKind::Ident | MetaVarKind::TT => { + unreachable!() + } + }, _ => false, }, NonterminalKind::Path | NonterminalKind::Meta => match &token.kind { token::PathSep | token::Ident(..) | token::NtIdent(..) => true, - token::Interpolated(nt) => may_be_ident(nt), + token::Interpolated(nt) => nt_may_be_ident(nt), + token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(kind))) => { + may_be_ident(*kind) + } _ => false, }, NonterminalKind::Pat(pat_kind) => token.can_begin_pattern(pat_kind),