Auto merge of #114273 - nnethercote:move-doc-comment-desugaring, r=petrochenkov

Move doc comment desugaring out of `TokenCursor`. It's awkward that `TokenCursor` sometimes desugars doc comments on the fly, but usually doesn't. r? `@petrochenkov`
2023-08-01 21:27:48 +00:00 · 2023-08-01 21:27:48 +00:00 · d12c6e947c
commit d12c6e947c
parent abd3637e42 2e6ce68fba
4 changed files with 120 additions and 104 deletions
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -13,7 +13,7 @@
 //! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
 //! ownership of the original.

-use crate::ast::StmtKind;
+use crate::ast::{AttrStyle, StmtKind};
 use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
 use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
 use crate::AttrVec;
@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::{self, Lrc};
 use rustc_macros::HashStable_Generic;
 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
-use rustc_span::{Span, DUMMY_SP};
+use rustc_span::{sym, Span, Symbol, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};

 use std::borrow::Cow;
-use std::{fmt, iter, mem};
+use std::{cmp, fmt, iter, mem};

 /// When the main Rust parser encounters a syntax-extension invocation, it
 /// parses the arguments to the invocation as a token tree. This is a very
@ -566,6 +566,92 @@ impl TokenStream {
    pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
        self.0.chunks(chunk_size)
    }
+
+    /// Desugar doc comments like `/// foo` in the stream into `#[doc =
+    /// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little
+    /// as possible.
+    pub fn desugar_doc_comments(&mut self) {
+        if let Some(desugared_stream) = desugar_inner(self.clone()) {
+            *self = desugared_stream;
+        }
+
+        // The return value is `None` if nothing in `stream` changed.
+        fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
+            let mut i = 0;
+            let mut modified = false;
+            while let Some(tt) = stream.0.get(i) {
+                match tt {
+                    &TokenTree::Token(
+                        Token { kind: token::DocComment(_, attr_style, data), span },
+                        _spacing,
+                    ) => {
+                        let desugared = desugared_tts(attr_style, data, span);
+                        let desugared_len = desugared.len();
+                        Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
+                        modified = true;
+                        i += desugared_len;
+                    }
+
+                    &TokenTree::Token(..) => i += 1,
+
+                    &TokenTree::Delimited(sp, delim, ref delim_stream) => {
+                        if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
+                            let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream);
+                            Lrc::make_mut(&mut stream.0)[i] = new_tt;
+                            modified = true;
+                        }
+                        i += 1;
+                    }
+                }
+            }
+            if modified { Some(stream) } else { None }
+        }
+
+        fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
+            // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
+            // required to wrap the text. E.g.
+            // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
+            // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
+            // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
+            let mut num_of_hashes = 0;
+            let mut count = 0;
+            for ch in data.as_str().chars() {
+                count = match ch {
+                    '"' => 1,
+                    '#' if count > 0 => count + 1,
+                    _ => 0,
+                };
+                num_of_hashes = cmp::max(num_of_hashes, count);
+            }
+
+            // `/// foo` becomes `doc = r"foo"`.
+            let delim_span = DelimSpan::from_single(span);
+            let body = TokenTree::Delimited(
+                delim_span,
+                Delimiter::Bracket,
+                [
+                    TokenTree::token_alone(token::Ident(sym::doc, false), span),
+                    TokenTree::token_alone(token::Eq, span),
+                    TokenTree::token_alone(
+                        TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
+                        span,
+                    ),
+                ]
+                .into_iter()
+                .collect::<TokenStream>(),
+            );
+
+            if attr_style == AttrStyle::Inner {
+                vec![
+                    TokenTree::token_alone(token::Pound, span),
+                    TokenTree::token_alone(token::Not, span),
+                    body,
+                ]
+            } else {
+                vec![TokenTree::token_alone(token::Pound, span), body]
+            }
+        }
+    }
 }

 /// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
@ -628,15 +714,6 @@ impl TokenTreeCursor {
    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
        self.stream.0.get(self.index + n)
    }
-
-    // Replace the previously obtained token tree with `tts`, and rewind to
-    // just before them.
-    pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
-        assert!(self.index > 0);
-        self.index -= 1;
-        let stream = Lrc::make_mut(&mut self.stream.0);
-        stream.splice(self.index..self.index + 1, tts);
-    }
 }

 #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@ -249,7 +249,7 @@ fn expand_macro<'cx>(
                trace_macros_note(&mut cx.expansions, sp, msg);
            }

-            let p = Parser::new(sess, tts, false, None);
+            let p = Parser::new(sess, tts, None);

            if is_local {
                cx.resolver.record_macro_rule_usage(node_id, i);
@ -257,7 +257,7 @@ fn expand_macro<'cx>(

            // Let the context choose how to interpret the result.
            // Weird, but useful for X-macros.
-            return Box::new(ParserAnyMacro {
+            Box::new(ParserAnyMacro {
                parser: p,

                // Pass along the original expansion site and the name of the macro
@ -269,18 +269,17 @@ fn expand_macro<'cx>(
                is_trailing_mac: cx.current_expansion.is_trailing_mac,
                arm_span,
                is_local,
-            });
+            })
        }
        Err(CanRetry::No(_)) => {
            debug!("Will not retry matching as an error was emitted already");
-            return DummyResult::any(sp);
+            DummyResult::any(sp)
        }
        Err(CanRetry::Yes) => {
-            // Retry and emit a better error below.
+            // Retry and emit a better error.
+            diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
        }
    }
-
-    diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
 }

 pub(super) enum CanRetry {
@ -447,7 +446,7 @@ pub fn compile_declarative_macro(

    let create_parser = || {
        let body = macro_def.body.tokens.clone();
-        Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS)
+        Parser::new(&sess.parse_sess, body, rustc_parse::MACRO_ARGUMENTS)
    };

    let parser = create_parser();
@ -457,8 +456,8 @@ pub fn compile_declarative_macro(
        match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
            Success(m) => m,
            Failure(()) => {
-                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it with another one
-                // that gives us the information we need.
+                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
+                // with another one that gives us the information we need.
                // For this we need to reclone the macro body as the previous parser consumed it.
                let retry_parser = create_parser();

@ -1417,6 +1416,11 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
    }
 }

-pub(super) fn parser_from_cx(sess: &ParseSess, tts: TokenStream, recovery: Recovery) -> Parser<'_> {
-    Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
+pub(super) fn parser_from_cx(
+    sess: &ParseSess,
+    mut tts: TokenStream,
+    recovery: Recovery,
+) -> Parser<'_> {
+    tts.desugar_doc_comments();
+    Parser::new(sess, tts, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
 }
--- a/compiler/rustc_parse/src/lib.rs
+++ b/compiler/rustc_parse/src/lib.rs
@ -205,7 +205,7 @@ pub fn stream_to_parser<'a>(
    stream: TokenStream,
    subparser_name: Option<&'static str>,
 ) -> Parser<'a> {
-    Parser::new(sess, stream, false, subparser_name)
+    Parser::new(sess, stream, subparser_name)
 }

 /// Runs the given subparser `f` on the tokens of the given `attr`'s item.
@ -215,7 +215,7 @@ pub fn parse_in<'a, T>(
    name: &'static str,
    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
 ) -> PResult<'a, T> {
-    let mut parser = Parser::new(sess, tts, false, Some(name));
+    let mut parser = Parser::new(sess, tts, Some(name));
    let result = f(&mut parser)?;
    if parser.token != token::Eof {
        parser.unexpected()?;
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
-use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
+use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
 use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
 use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use std::ops::Range;
-use std::{cmp, mem, slice};
+use std::{mem, slice};
 use thin_vec::ThinVec;
 use tracing::debug;

@ -224,11 +224,6 @@ struct TokenCursor {
    // because it's the outermost token stream which never has delimiters.
    stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,

-    // We need to desugar doc comments from `/// foo` form into `#[doc =
-    // r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
-    // because some declarative macros look for `doc` attributes.
-    desugar_doc_comments: bool,
-
    // Counts the number of calls to `{,inlined_}next`.
    num_next_calls: usize,

@ -265,29 +260,17 @@ impl TokenCursor {
    #[inline(always)]
    fn inlined_next(&mut self) -> (Token, Spacing) {
        loop {
-            // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
-            // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
-            // removed.
+            // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
+            // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
+            // below can be removed.
            if let Some(tree) = self.tree_cursor.next_ref() {
                match tree {
                    &TokenTree::Token(ref token, spacing) => {
-                        match (self.desugar_doc_comments, token) {
-                            (
-                                true,
-                                &Token { kind: token::DocComment(_, attr_style, data), span },
-                            ) => {
-                                let desugared = self.desugar(attr_style, data, span);
-                                self.tree_cursor.replace_prev_and_rewind(desugared);
-                                // Continue to get the first token of the desugared doc comment.
-                            }
-                            _ => {
-                                debug_assert!(!matches!(
-                                    token.kind,
-                                    token::OpenDelim(_) | token::CloseDelim(_)
-                                ));
-                                return (token.clone(), spacing);
-                            }
-                        }
+                        debug_assert!(!matches!(
+                            token.kind,
+                            token::OpenDelim(_) | token::CloseDelim(_)
+                        ));
+                        return (token.clone(), spacing);
                    }
                    &TokenTree::Delimited(sp, delim, ref tts) => {
                        let trees = tts.clone().into_trees();
@ -311,52 +294,6 @@ impl TokenCursor {
            }
        }
    }
-
-    // Desugar a doc comment into something like `#[doc = r"foo"]`.
-    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
-        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
-        // required to wrap the text. E.g.
-        // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
-        // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
-        // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
-        let mut num_of_hashes = 0;
-        let mut count = 0;
-        for ch in data.as_str().chars() {
-            count = match ch {
-                '"' => 1,
-                '#' if count > 0 => count + 1,
-                _ => 0,
-            };
-            num_of_hashes = cmp::max(num_of_hashes, count);
-        }
-
-        // `/// foo` becomes `doc = r"foo"`.
-        let delim_span = DelimSpan::from_single(span);
-        let body = TokenTree::Delimited(
-            delim_span,
-            Delimiter::Bracket,
-            [
-                TokenTree::token_alone(token::Ident(sym::doc, false), span),
-                TokenTree::token_alone(token::Eq, span),
-                TokenTree::token_alone(
-                    TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
-                    span,
-                ),
-            ]
-            .into_iter()
-            .collect::<TokenStream>(),
-        );
-
-        if attr_style == AttrStyle::Inner {
-            vec![
-                TokenTree::token_alone(token::Pound, span),
-                TokenTree::token_alone(token::Not, span),
-                body,
-            ]
-        } else {
-            vec![TokenTree::token_alone(token::Pound, span), body]
-        }
-    }
 }

 #[derive(Debug, Clone, PartialEq)]
@ -451,8 +388,7 @@ pub(super) fn token_descr(token: &Token) -> String {
 impl<'a> Parser<'a> {
    pub fn new(
        sess: &'a ParseSess,
-        tokens: TokenStream,
-        desugar_doc_comments: bool,
+        stream: TokenStream,
        subparser_name: Option<&'static str>,
    ) -> Self {
        let mut parser = Parser {
@ -464,10 +400,9 @@ impl<'a> Parser<'a> {
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
            token_cursor: TokenCursor {
-                tree_cursor: tokens.into_trees(),
+                tree_cursor: stream.into_trees(),
                stack: Vec::new(),
                num_next_calls: 0,
-                desugar_doc_comments,
                break_last_token: false,
            },
            unmatched_angle_bracket_count: 0,
@ -1172,7 +1107,7 @@ impl<'a> Parser<'a> {
            }
            i += 1;
        }
-        return looker(&token);
+        looker(&token)
    }

    /// Returns whether any of the given keywords are `dist` tokens ahead of the current one.