Auto merge of #114273 - nnethercote:move-doc-comment-desugaring, r=petrochenkov

Move doc comment desugaring out of `TokenCursor`.

It's awkward that `TokenCursor` sometimes desugars doc comments on the fly, but usually doesn't.

r? `@petrochenkov`
This commit is contained in:
bors 2023-08-01 21:27:48 +00:00
commit d12c6e947c
4 changed files with 120 additions and 104 deletions

View file

@ -13,7 +13,7 @@
//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking //! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
//! ownership of the original. //! ownership of the original.
use crate::ast::StmtKind; use crate::ast::{AttrStyle, StmtKind};
use crate::ast_traits::{HasAttrs, HasSpan, HasTokens}; use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind}; use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
use crate::AttrVec; use crate::AttrVec;
@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::{self, Lrc}; use rustc_data_structures::sync::{self, Lrc};
use rustc_macros::HashStable_Generic; use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use rustc_span::{Span, DUMMY_SP}; use rustc_span::{sym, Span, Symbol, DUMMY_SP};
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use std::borrow::Cow; use std::borrow::Cow;
use std::{fmt, iter, mem}; use std::{cmp, fmt, iter, mem};
/// When the main Rust parser encounters a syntax-extension invocation, it /// When the main Rust parser encounters a syntax-extension invocation, it
/// parses the arguments to the invocation as a token tree. This is a very /// parses the arguments to the invocation as a token tree. This is a very
@ -566,6 +566,92 @@ impl TokenStream {
pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> { pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
self.0.chunks(chunk_size) self.0.chunks(chunk_size)
} }
/// Desugar doc comments like `/// foo` in the stream into `#[doc =
/// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little
/// as possible.
pub fn desugar_doc_comments(&mut self) {
if let Some(desugared_stream) = desugar_inner(self.clone()) {
*self = desugared_stream;
}
// The return value is `None` if nothing in `stream` changed.
fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
let mut i = 0;
let mut modified = false;
while let Some(tt) = stream.0.get(i) {
match tt {
&TokenTree::Token(
Token { kind: token::DocComment(_, attr_style, data), span },
_spacing,
) => {
let desugared = desugared_tts(attr_style, data, span);
let desugared_len = desugared.len();
Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
modified = true;
i += desugared_len;
}
&TokenTree::Token(..) => i += 1,
&TokenTree::Delimited(sp, delim, ref delim_stream) => {
if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream);
Lrc::make_mut(&mut stream.0)[i] = new_tt;
modified = true;
}
i += 1;
}
}
}
if modified { Some(stream) } else { None }
}
fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
let mut num_of_hashes = 0;
let mut count = 0;
for ch in data.as_str().chars() {
count = match ch {
'"' => 1,
'#' if count > 0 => count + 1,
_ => 0,
};
num_of_hashes = cmp::max(num_of_hashes, count);
}
// `/// foo` becomes `doc = r"foo"`.
let delim_span = DelimSpan::from_single(span);
let body = TokenTree::Delimited(
delim_span,
Delimiter::Bracket,
[
TokenTree::token_alone(token::Ident(sym::doc, false), span),
TokenTree::token_alone(token::Eq, span),
TokenTree::token_alone(
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
span,
),
]
.into_iter()
.collect::<TokenStream>(),
);
if attr_style == AttrStyle::Inner {
vec![
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
body,
]
} else {
vec![TokenTree::token_alone(token::Pound, span), body]
}
}
}
} }
/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree` /// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
@ -628,15 +714,6 @@ impl TokenTreeCursor {
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n) self.stream.0.get(self.index + n)
} }
// Replace the previously obtained token tree with `tts`, and rewind to
// just before them.
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
assert!(self.index > 0);
self.index -= 1;
let stream = Lrc::make_mut(&mut self.stream.0);
stream.splice(self.index..self.index + 1, tts);
}
} }
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]

View file

@ -249,7 +249,7 @@ fn expand_macro<'cx>(
trace_macros_note(&mut cx.expansions, sp, msg); trace_macros_note(&mut cx.expansions, sp, msg);
} }
let p = Parser::new(sess, tts, false, None); let p = Parser::new(sess, tts, None);
if is_local { if is_local {
cx.resolver.record_macro_rule_usage(node_id, i); cx.resolver.record_macro_rule_usage(node_id, i);
@ -257,7 +257,7 @@ fn expand_macro<'cx>(
// Let the context choose how to interpret the result. // Let the context choose how to interpret the result.
// Weird, but useful for X-macros. // Weird, but useful for X-macros.
return Box::new(ParserAnyMacro { Box::new(ParserAnyMacro {
parser: p, parser: p,
// Pass along the original expansion site and the name of the macro // Pass along the original expansion site and the name of the macro
@ -269,18 +269,17 @@ fn expand_macro<'cx>(
is_trailing_mac: cx.current_expansion.is_trailing_mac, is_trailing_mac: cx.current_expansion.is_trailing_mac,
arm_span, arm_span,
is_local, is_local,
}); })
} }
Err(CanRetry::No(_)) => { Err(CanRetry::No(_)) => {
debug!("Will not retry matching as an error was emitted already"); debug!("Will not retry matching as an error was emitted already");
return DummyResult::any(sp); DummyResult::any(sp)
} }
Err(CanRetry::Yes) => { Err(CanRetry::Yes) => {
// Retry and emit a better error below. // Retry and emit a better error.
diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
} }
} }
diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
} }
pub(super) enum CanRetry { pub(super) enum CanRetry {
@ -447,7 +446,7 @@ pub fn compile_declarative_macro(
let create_parser = || { let create_parser = || {
let body = macro_def.body.tokens.clone(); let body = macro_def.body.tokens.clone();
Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS) Parser::new(&sess.parse_sess, body, rustc_parse::MACRO_ARGUMENTS)
}; };
let parser = create_parser(); let parser = create_parser();
@ -457,8 +456,8 @@ pub fn compile_declarative_macro(
match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) { match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
Success(m) => m, Success(m) => m,
Failure(()) => { Failure(()) => {
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it with another one // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
// that gives us the information we need. // with another one that gives us the information we need.
// For this we need to reclone the macro body as the previous parser consumed it. // For this we need to reclone the macro body as the previous parser consumed it.
let retry_parser = create_parser(); let retry_parser = create_parser();
@ -1417,6 +1416,11 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
} }
} }
pub(super) fn parser_from_cx(sess: &ParseSess, tts: TokenStream, recovery: Recovery) -> Parser<'_> { pub(super) fn parser_from_cx(
Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS).recovery(recovery) sess: &ParseSess,
mut tts: TokenStream,
recovery: Recovery,
) -> Parser<'_> {
tts.desugar_doc_comments();
Parser::new(sess, tts, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
} }

View file

@ -205,7 +205,7 @@ pub fn stream_to_parser<'a>(
stream: TokenStream, stream: TokenStream,
subparser_name: Option<&'static str>, subparser_name: Option<&'static str>,
) -> Parser<'a> { ) -> Parser<'a> {
Parser::new(sess, stream, false, subparser_name) Parser::new(sess, stream, subparser_name)
} }
/// Runs the given subparser `f` on the tokens of the given `attr`'s item. /// Runs the given subparser `f` on the tokens of the given `attr`'s item.
@ -215,7 +215,7 @@ pub fn parse_in<'a, T>(
name: &'static str, name: &'static str,
mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
) -> PResult<'a, T> { ) -> PResult<'a, T> {
let mut parser = Parser::new(sess, tts, false, Some(name)); let mut parser = Parser::new(sess, tts, Some(name));
let result = f(&mut parser)?; let result = f(&mut parser)?;
if parser.token != token::Eof { if parser.token != token::Eof {
parser.unexpected()?; parser.unexpected()?;

View file

@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
use rustc_ast::util::case::Case; use rustc_ast::util::case::Case;
use rustc_ast::AttrId; use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID; use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern}; use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit}; use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind}; use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
use rustc_ast_pretty::pprust; use rustc_ast_pretty::pprust;
@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::source_map::{Span, DUMMY_SP};
use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::symbol::{kw, sym, Ident, Symbol};
use std::ops::Range; use std::ops::Range;
use std::{cmp, mem, slice}; use std::{mem, slice};
use thin_vec::ThinVec; use thin_vec::ThinVec;
use tracing::debug; use tracing::debug;
@ -224,11 +224,6 @@ struct TokenCursor {
// because it's the outermost token stream which never has delimiters. // because it's the outermost token stream which never has delimiters.
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>, stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
// We need to desugar doc comments from `/// foo` form into `#[doc =
// r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
// because some declarative macros look for `doc` attributes.
desugar_doc_comments: bool,
// Counts the number of calls to `{,inlined_}next`. // Counts the number of calls to `{,inlined_}next`.
num_next_calls: usize, num_next_calls: usize,
@ -265,29 +260,17 @@ impl TokenCursor {
#[inline(always)] #[inline(always)]
fn inlined_next(&mut self) -> (Token, Spacing) { fn inlined_next(&mut self) -> (Token, Spacing) {
loop { loop {
// FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
// need to, whereupon the `delim != Delimiter::Invisible` conditions below can be // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
// removed. // below can be removed.
if let Some(tree) = self.tree_cursor.next_ref() { if let Some(tree) = self.tree_cursor.next_ref() {
match tree { match tree {
&TokenTree::Token(ref token, spacing) => { &TokenTree::Token(ref token, spacing) => {
match (self.desugar_doc_comments, token) { debug_assert!(!matches!(
( token.kind,
true, token::OpenDelim(_) | token::CloseDelim(_)
&Token { kind: token::DocComment(_, attr_style, data), span }, ));
) => { return (token.clone(), spacing);
let desugared = self.desugar(attr_style, data, span);
self.tree_cursor.replace_prev_and_rewind(desugared);
// Continue to get the first token of the desugared doc comment.
}
_ => {
debug_assert!(!matches!(
token.kind,
token::OpenDelim(_) | token::CloseDelim(_)
));
return (token.clone(), spacing);
}
}
} }
&TokenTree::Delimited(sp, delim, ref tts) => { &TokenTree::Delimited(sp, delim, ref tts) => {
let trees = tts.clone().into_trees(); let trees = tts.clone().into_trees();
@ -311,52 +294,6 @@ impl TokenCursor {
} }
} }
} }
// Desugar a doc comment into something like `#[doc = r"foo"]`.
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
let mut num_of_hashes = 0;
let mut count = 0;
for ch in data.as_str().chars() {
count = match ch {
'"' => 1,
'#' if count > 0 => count + 1,
_ => 0,
};
num_of_hashes = cmp::max(num_of_hashes, count);
}
// `/// foo` becomes `doc = r"foo"`.
let delim_span = DelimSpan::from_single(span);
let body = TokenTree::Delimited(
delim_span,
Delimiter::Bracket,
[
TokenTree::token_alone(token::Ident(sym::doc, false), span),
TokenTree::token_alone(token::Eq, span),
TokenTree::token_alone(
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
span,
),
]
.into_iter()
.collect::<TokenStream>(),
);
if attr_style == AttrStyle::Inner {
vec![
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
body,
]
} else {
vec![TokenTree::token_alone(token::Pound, span), body]
}
}
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
@ -451,8 +388,7 @@ pub(super) fn token_descr(token: &Token) -> String {
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
pub fn new( pub fn new(
sess: &'a ParseSess, sess: &'a ParseSess,
tokens: TokenStream, stream: TokenStream,
desugar_doc_comments: bool,
subparser_name: Option<&'static str>, subparser_name: Option<&'static str>,
) -> Self { ) -> Self {
let mut parser = Parser { let mut parser = Parser {
@ -464,10 +400,9 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(), restrictions: Restrictions::empty(),
expected_tokens: Vec::new(), expected_tokens: Vec::new(),
token_cursor: TokenCursor { token_cursor: TokenCursor {
tree_cursor: tokens.into_trees(), tree_cursor: stream.into_trees(),
stack: Vec::new(), stack: Vec::new(),
num_next_calls: 0, num_next_calls: 0,
desugar_doc_comments,
break_last_token: false, break_last_token: false,
}, },
unmatched_angle_bracket_count: 0, unmatched_angle_bracket_count: 0,
@ -1172,7 +1107,7 @@ impl<'a> Parser<'a> {
} }
i += 1; i += 1;
} }
return looker(&token); looker(&token)
} }
/// Returns whether any of the given keywords are `dist` tokens ahead of the current one. /// Returns whether any of the given keywords are `dist` tokens ahead of the current one.