Auto merge of #114273 - nnethercote:move-doc-comment-desugaring, r=petrochenkov

Move doc comment desugaring out of `TokenCursor`. It's awkward that `TokenCursor` sometimes desugars doc comments on the fly, but usually doesn't. r? `@petrochenkov`
2023-08-01 21:27:48 +00:00 · 2023-08-01 21:27:48 +00:00 · d12c6e947c
commit d12c6e947c
parent abd3637e42 2e6ce68fba
4 changed files with 120 additions and 104 deletions
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -13,7 +13,7 @@
 //! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
 //! ownership of the original.
-use crate::ast::StmtKind;
+use crate::ast::{AttrStyle, StmtKind};
 use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
 use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
 use crate::AttrVec;
@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::{self, Lrc};
 use rustc_macros::HashStable_Generic;
 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
-use rustc_span::{Span, DUMMY_SP};
+use rustc_span::{sym, Span, Symbol, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};
 use std::borrow::Cow;
-use std::{fmt, iter, mem};
+use std::{cmp, fmt, iter, mem};
 /// When the main Rust parser encounters a syntax-extension invocation, it
 /// parses the arguments to the invocation as a token tree. This is a very
@ -566,6 +566,92 @@ impl TokenStream {
    pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
        self.0.chunks(chunk_size)
    }
    /// Desugar doc comments like `/// foo` in the stream into `#[doc =
    /// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little
    /// as possible.
    pub fn desugar_doc_comments(&mut self) {
        if let Some(desugared_stream) = desugar_inner(self.clone()) {
            *self = desugared_stream;
        }
        // The return value is `None` if nothing in `stream` changed.
        fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
            let mut i = 0;
            let mut modified = false;
            while let Some(tt) = stream.0.get(i) {
                match tt {
                    &TokenTree::Token(
                        Token { kind: token::DocComment(_, attr_style, data), span },
                        _spacing,
                    ) => {
                        let desugared = desugared_tts(attr_style, data, span);
                        let desugared_len = desugared.len();
                        Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
                        modified = true;
                        i += desugared_len;
                    }
                    &TokenTree::Token(..) => i += 1,
                    &TokenTree::Delimited(sp, delim, ref delim_stream) => {
                        if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
                            let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream);
                            Lrc::make_mut(&mut stream.0)[i] = new_tt;
                            modified = true;
                        }
                        i += 1;
                    }
                }
            }
            if modified { Some(stream) } else { None }
        }
        fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
            // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
            // required to wrap the text. E.g.
            // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
            // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
            // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
            let mut num_of_hashes = 0;
            let mut count = 0;
            for ch in data.as_str().chars() {
                count = match ch {
                    '"' => 1,
                    '#' if count > 0 => count + 1,
                    _ => 0,
                };
                num_of_hashes = cmp::max(num_of_hashes, count);
            }
            // `/// foo` becomes `doc = r"foo"`.
            let delim_span = DelimSpan::from_single(span);
            let body = TokenTree::Delimited(
                delim_span,
                Delimiter::Bracket,
                [
                    TokenTree::token_alone(token::Ident(sym::doc, false), span),
                    TokenTree::token_alone(token::Eq, span),
                    TokenTree::token_alone(
                        TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
                        span,
                    ),
                ]
                .into_iter()
                .collect::<TokenStream>(),
            );
            if attr_style == AttrStyle::Inner {
                vec![
                    TokenTree::token_alone(token::Pound, span),
                    TokenTree::token_alone(token::Not, span),
                    body,
                ]
            } else {
                vec![TokenTree::token_alone(token::Pound, span), body]
            }
        }
    }
 }
 /// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
@ -628,15 +714,6 @@ impl TokenTreeCursor {
    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
        self.stream.0.get(self.index + n)
    }
    // Replace the previously obtained token tree with `tts`, and rewind to
    // just before them.
    pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
        assert!(self.index > 0);
        self.index -= 1;
        let stream = Lrc::make_mut(&mut self.stream.0);
        stream.splice(self.index..self.index + 1, tts);
    }
 }
 #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@ -249,7 +249,7 @@ fn expand_macro<'cx>(
                trace_macros_note(&mut cx.expansions, sp, msg);
            }
-            let p = Parser::new(sess, tts, false, None);
+            let p = Parser::new(sess, tts, None);
            if is_local {
                cx.resolver.record_macro_rule_usage(node_id, i);
@ -257,7 +257,7 @@ fn expand_macro<'cx>(
            // Let the context choose how to interpret the result.
            // Weird, but useful for X-macros.
-            return Box::new(ParserAnyMacro {
+            Box::new(ParserAnyMacro {
                parser: p,
                // Pass along the original expansion site and the name of the macro
@ -269,18 +269,17 @@ fn expand_macro<'cx>(
                is_trailing_mac: cx.current_expansion.is_trailing_mac,
                arm_span,
                is_local,
-            });
+            })
        }
        Err(CanRetry::No(_)) => {
            debug!("Will not retry matching as an error was emitted already");
-            return DummyResult::any(sp);
+            DummyResult::any(sp)
        }
        Err(CanRetry::Yes) => {
-            // Retry and emit a better error below.
+            // Retry and emit a better error.
            diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
        }
    }
    diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
 }
 pub(super) enum CanRetry {
@ -447,7 +446,7 @@ pub fn compile_declarative_macro(
    let create_parser = || {
        let body = macro_def.body.tokens.clone();
-        Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS)
+        Parser::new(&sess.parse_sess, body, rustc_parse::MACRO_ARGUMENTS)
    };
    let parser = create_parser();
@ -457,8 +456,8 @@ pub fn compile_declarative_macro(
        match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
            Success(m) => m,
            Failure(()) => {
-                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it with another one
+                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
-                // that gives us the information we need.
+                // with another one that gives us the information we need.
                // For this we need to reclone the macro body as the previous parser consumed it.
                let retry_parser = create_parser();
@ -1417,6 +1416,11 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
    }
 }
-pub(super) fn parser_from_cx(sess: &ParseSess, tts: TokenStream, recovery: Recovery) -> Parser<'_> {
+pub(super) fn parser_from_cx(
-    Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
+    sess: &ParseSess,
    mut tts: TokenStream,
    recovery: Recovery,
 ) -> Parser<'_> {
    tts.desugar_doc_comments();
    Parser::new(sess, tts, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
 }
--- a/compiler/rustc_parse/src/lib.rs
+++ b/compiler/rustc_parse/src/lib.rs
@ -205,7 +205,7 @@ pub fn stream_to_parser<'a>(
    stream: TokenStream,
    subparser_name: Option<&'static str>,
 ) -> Parser<'a> {
-    Parser::new(sess, stream, false, subparser_name)
+    Parser::new(sess, stream, subparser_name)
 }
 /// Runs the given subparser `f` on the tokens of the given `attr`'s item.
@ -215,7 +215,7 @@ pub fn parse_in<'a, T>(
    name: &'static str,
    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
 ) -> PResult<'a, T> {
-    let mut parser = Parser::new(sess, tts, false, Some(name));
+    let mut parser = Parser::new(sess, tts, Some(name));
    let result = f(&mut parser)?;
    if parser.token != token::Eof {
        parser.unexpected()?;
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
-use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
+use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
 use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
 use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use std::ops::Range;
-use std::{cmp, mem, slice};
+use std::{mem, slice};
 use thin_vec::ThinVec;
 use tracing::debug;
@ -224,11 +224,6 @@ struct TokenCursor {
    // because it's the outermost token stream which never has delimiters.
    stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
    // We need to desugar doc comments from `/// foo` form into `#[doc =
    // r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
    // because some declarative macros look for `doc` attributes.
    desugar_doc_comments: bool,
    // Counts the number of calls to `{,inlined_}next`.
    num_next_calls: usize,
@ -265,29 +260,17 @@ impl TokenCursor {
    #[inline(always)]
    fn inlined_next(&mut self) -> (Token, Spacing) {
        loop {
-            // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
+            // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
-            // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
+            // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
-            // removed.
+            // below can be removed.
            if let Some(tree) = self.tree_cursor.next_ref() {
                match tree {
                    &TokenTree::Token(ref token, spacing) => {
-                        match (self.desugar_doc_comments, token) {
+                        debug_assert!(!matches!(
-                            (
+                            token.kind,
-                                true,
+                            token::OpenDelim(_) | token::CloseDelim(_)
-                                &Token { kind: token::DocComment(_, attr_style, data), span },
+                        ));
-                            ) => {
+                        return (token.clone(), spacing);
                                let desugared = self.desugar(attr_style, data, span);
                                self.tree_cursor.replace_prev_and_rewind(desugared);
                                // Continue to get the first token of the desugared doc comment.
                            }
                            _ => {
                                debug_assert!(!matches!(
                                    token.kind,
                                    token::OpenDelim(_) | token::CloseDelim(_)
                                ));
                                return (token.clone(), spacing);
                            }
                        }
                    }
                    &TokenTree::Delimited(sp, delim, ref tts) => {
                        let trees = tts.clone().into_trees();
@ -311,52 +294,6 @@ impl TokenCursor {
            }
        }
    }
    // Desugar a doc comment into something like `#[doc = r"foo"]`.
    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
        // required to wrap the text. E.g.
        // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
        // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
        // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
        let mut num_of_hashes = 0;
        let mut count = 0;
        for ch in data.as_str().chars() {
            count = match ch {
                '"' => 1,
                '#' if count > 0 => count + 1,
                _ => 0,
            };
            num_of_hashes = cmp::max(num_of_hashes, count);
        }
        // `/// foo` becomes `doc = r"foo"`.
        let delim_span = DelimSpan::from_single(span);
        let body = TokenTree::Delimited(
            delim_span,
            Delimiter::Bracket,
            [
                TokenTree::token_alone(token::Ident(sym::doc, false), span),
                TokenTree::token_alone(token::Eq, span),
                TokenTree::token_alone(
                    TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
                    span,
                ),
            ]
            .into_iter()
            .collect::<TokenStream>(),
        );
        if attr_style == AttrStyle::Inner {
            vec![
                TokenTree::token_alone(token::Pound, span),
                TokenTree::token_alone(token::Not, span),
                body,
            ]
        } else {
            vec![TokenTree::token_alone(token::Pound, span), body]
        }
    }
 }
 #[derive(Debug, Clone, PartialEq)]
@ -451,8 +388,7 @@ pub(super) fn token_descr(token: &Token) -> String {
 impl<'a> Parser<'a> {
    pub fn new(
        sess: &'a ParseSess,
-        tokens: TokenStream,
+        stream: TokenStream,
        desugar_doc_comments: bool,
        subparser_name: Option<&'static str>,
    ) -> Self {
        let mut parser = Parser {
@ -464,10 +400,9 @@ impl<'a> Parser<'a> {
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
            token_cursor: TokenCursor {
-                tree_cursor: tokens.into_trees(),
+                tree_cursor: stream.into_trees(),
                stack: Vec::new(),
                num_next_calls: 0,
                desugar_doc_comments,
                break_last_token: false,
            },
            unmatched_angle_bracket_count: 0,
@ -1172,7 +1107,7 @@ impl<'a> Parser<'a> {
            }
            i += 1;
        }
-        return looker(&token);
+        looker(&token)
    }
    /// Returns whether any of the given keywords are `dist` tokens ahead of the current one.