Auto merge of #129346 - nnethercote:fix-double-handling-in-collect_tokens, r=petrochenkov

Fix double handling in `collect_tokens`

Double handling of AST nodes can occur in `collect_tokens`. This is when an inner call to `collect_tokens` produces an AST node, and then an outer call to `collect_tokens` produces the same AST node. This can happen in a few places, e.g. expression statements where the statement delegates `HasTokens` and `HasAttrs` to the expression. It will also happen more after #124141.

This PR fixes some double handling cases that cause problems, including #129166.

r? `@petrochenkov`
This commit is contained in:
bors 2024-09-08 05:35:23 +00:00
commit 6d05f12170
10 changed files with 200 additions and 178 deletions

View file

@ -4167,6 +4167,7 @@ dependencies = [
"rustc_errors",
"rustc_feature",
"rustc_fluent_macro",
"rustc_index",
"rustc_lexer",
"rustc_macros",
"rustc_session",

View file

@ -18,7 +18,7 @@ mod tests;
#[derive(Debug, Clone)]
pub struct IntervalSet<I> {
// Start, end
map: SmallVec<[(u32, u32); 4]>,
map: SmallVec<[(u32, u32); 2]>,
domain: usize,
_data: PhantomData<I>,
}

View file

@ -12,6 +12,7 @@ rustc_data_structures = { path = "../rustc_data_structures" }
rustc_errors = { path = "../rustc_errors" }
rustc_feature = { path = "../rustc_feature" }
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
rustc_index = { path = "../rustc_index" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_macros = { path = "../rustc_macros" }
rustc_session = { path = "../rustc_session" }

View file

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::{iter, mem};
use rustc_ast::token::{Delimiter, Token, TokenKind};
@ -6,6 +7,7 @@ use rustc_ast::tokenstream::{
Spacing, ToAttrTokenStream,
};
use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens};
use rustc_data_structures::fx::FxHashSet;
use rustc_errors::PResult;
use rustc_session::parse::ParseSess;
use rustc_span::{sym, Span, DUMMY_SP};
@ -134,9 +136,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
node_replacements.array_windows()
{
assert!(
node_range.0.end <= next_node_range.0.start
|| node_range.0.end >= next_node_range.0.end,
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
node_range.0.end <= next_node_range.0.start,
"Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})",
node_range,
tokens,
next_node_range,
@ -144,20 +145,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
);
}
// Process the replace ranges, starting from the highest start
// position and working our way back. If have tokens like:
//
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// Then we will generate replace ranges for both
// the `#[cfg(FALSE)] field: bool` and the entire
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// By starting processing from the replace range with the greatest
// start position, we ensure that any (outer) replace range which
// encloses another (inner) replace range will fully overwrite the
// inner range's replacement.
for (node_range, target) in node_replacements.into_iter().rev() {
// Process the replace ranges.
for (node_range, target) in node_replacements.into_iter() {
assert!(
!node_range.0.is_empty(),
"Cannot replace an empty node range: {:?}",
@ -234,6 +223,8 @@ impl<'a> Parser<'a> {
force_collect: ForceCollect,
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
) -> PResult<'a, R> {
let possible_capture_mode = self.capture_cfg;
// We must collect if anything could observe the collected tokens, i.e.
// if any of the following conditions hold.
// - We are force collecting tokens (because force collection requires
@ -244,9 +235,9 @@ impl<'a> Parser<'a> {
// - Our target supports custom inner attributes (custom
// inner attribute invocation might require token capturing).
|| R::SUPPORTS_CUSTOM_INNER_ATTRS
// - We are in `capture_cfg` mode (which requires tokens if
// - We are in "possible capture mode" (which requires tokens if
// the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
|| self.capture_cfg;
|| possible_capture_mode;
if !needs_collection {
return Ok(f(self, attrs.attrs)?.0);
}
@ -267,18 +258,48 @@ impl<'a> Parser<'a> {
res?
};
// When we're not in `capture_cfg` mode, then skip collecting and
// return early if either of the following conditions hold.
// - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
// - `Some(None)`: Our target supports tokens and has none.
// - `Some(Some(_))`: Our target already has tokens set (e.g. we've
// parsed something like `#[my_attr] $item`). The actual parsing code
// takes care of prepending any attributes to the nonterminal, so we
// don't need to modify the already captured tokens.
// parsed something like `#[my_attr] $item`).
let ret_can_hold_tokens = matches!(ret.tokens_mut(), Some(None));
// Ignore any attributes we've previously processed. This happens when
// an inner call to `collect_tokens` returns an AST node and then an
// outer call ends up with the same AST node without any additional
// wrapping layer.
let mut seen_indices = FxHashSet::default();
for (i, attr) in ret.attrs().iter().enumerate() {
let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
if !is_unseen {
seen_indices.insert(i);
}
}
let ret_attrs: Cow<'_, [Attribute]> =
if seen_indices.is_empty() {
Cow::Borrowed(ret.attrs())
} else {
let ret_attrs =
ret.attrs()
.iter()
.enumerate()
.filter_map(|(i, attr)| {
if seen_indices.contains(&i) { None } else { Some(attr.clone()) }
})
.collect();
Cow::Owned(ret_attrs)
};
// When we're not in "definite capture mode", then skip collecting and
// return early if `ret` doesn't support tokens or already has some.
//
// Note that this check is independent of `force_collect`. There's no
// need to collect tokens when we don't support tokens or already have
// tokens.
if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
let definite_capture_mode = self.capture_cfg
&& matches!(self.capture_state.capturing, Capturing::Yes)
&& has_cfg_or_cfg_attr(&ret_attrs);
if !definite_capture_mode && !ret_can_hold_tokens {
return Ok(ret);
}
@ -297,12 +318,12 @@ impl<'a> Parser<'a> {
// outer and inner attributes. So this check is more precise than
// the earlier `needs_tokens` check, and we don't need to
// check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
|| needs_tokens(ret.attrs())
// - We are in `capture_cfg` mode and there are `#[cfg]` or
// `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
// parsing, we don't need any special capturing for those
// attributes, because they're builtin.)
|| (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
|| needs_tokens(&ret_attrs)
// - We are in "definite capture mode", which requires that there
// are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
// non-`capture_cfg` parsing, we don't need any special capturing
// for those attributes, because they're builtin.)
|| definite_capture_mode;
if !needs_collection {
return Ok(ret);
}
@ -336,7 +357,7 @@ impl<'a> Parser<'a> {
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
// which means the relevant tokens will be removed. (More details below.)
let mut inner_attr_parser_replacements = Vec::new();
for attr in ret.attrs() {
for attr in ret_attrs.iter() {
if attr.style == ast::AttrStyle::Inner {
if let Some(inner_attr_parser_range) =
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
@ -359,11 +380,10 @@ impl<'a> Parser<'a> {
// from `ParserRange` form to `NodeRange` form. We will perform the actual
// replacement only when we convert the `LazyAttrTokenStream` to an
// `AttrTokenStream`.
self.capture_state.parser_replacements
[parser_replacements_start..parser_replacements_end]
.iter()
.cloned()
.chain(inner_attr_parser_replacements.iter().cloned())
self.capture_state
.parser_replacements
.drain(parser_replacements_start..parser_replacements_end)
.chain(inner_attr_parser_replacements.into_iter())
.map(|(parser_range, data)| {
(NodeRange::new(parser_range, collect_pos.start_pos), data)
})
@ -399,20 +419,12 @@ impl<'a> Parser<'a> {
break_last_token: self.break_last_token,
node_replacements,
});
let mut tokens_used = false;
// If we support tokens and don't already have them, store the newly captured tokens.
if let Some(target_tokens @ None) = ret.tokens_mut() {
*target_tokens = Some(tokens.clone());
}
// If `capture_cfg` is set and we're inside a recursive call to
// `collect_tokens`, then we need to register a replace range if we
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
// cfg-expansion on the captured token stream.
if self.capture_cfg
&& matches!(self.capture_state.capturing, Capturing::Yes)
&& has_cfg_or_cfg_attr(ret.attrs())
{
// If in "definite capture mode" we need to register a replace range
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
// eager cfg-expansion on the captured token stream.
if definite_capture_mode {
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
// What is the status here when parsing the example code at the top of this method?
@ -429,7 +441,9 @@ impl<'a> Parser<'a> {
// cfg-expand this AST node.
let start_pos =
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
let target =
AttrsTarget { attrs: ret_attrs.iter().cloned().collect(), tokens: tokens.clone() };
tokens_used = true;
self.capture_state
.parser_replacements
.push((ParserRange(start_pos..end_pos), Some(target)));
@ -438,7 +452,16 @@ impl<'a> Parser<'a> {
// the outermost call to this method.
self.capture_state.parser_replacements.clear();
self.capture_state.inner_attr_parser_ranges.clear();
self.capture_state.seen_attrs.clear();
}
// If we support tokens and don't already have them, store the newly captured tokens.
if let Some(target_tokens @ None) = ret.tokens_mut() {
tokens_used = true;
*target_tokens = Some(tokens);
}
assert!(tokens_used); // check we didn't create `tokens` unnecessarily
Ok(ret)
}
}
@ -510,9 +533,11 @@ fn make_attr_token_stream(
}
/// Tokens are needed if:
/// - any non-single-segment attributes (other than doc comments) are present; or
/// - any `cfg_attr` attributes are present;
/// - any single-segment, non-builtin attributes are present.
/// - any non-single-segment attributes (other than doc comments) are present,
/// e.g. `rustfmt::skip`; or
/// - any `cfg_attr` attributes are present; or
/// - any single-segment, non-builtin attributes are present, e.g. `derive`,
/// `test`, `global_allocator`.
fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
attrs.iter().any(|attr| match attr.ident() {
None => !attr.is_doc_comment(),

View file

@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
use rustc_index::interval::IntervalSet;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{Span, DUMMY_SP};
@ -183,7 +184,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger.
#[cfg(target_pointer_width = "64")]
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
rustc_data_structures::static_assert_size!(Parser<'_>, 288);
/// Stores span information about a closure.
#[derive(Clone, Debug)]
@ -238,7 +239,8 @@ impl NodeRange {
// is the position of the function's start token. This gives
// `NodeRange(10..15)`.
fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
assert!(parser_range.start >= start_pos && parser_range.end >= start_pos);
assert!(!parser_range.is_empty());
assert!(parser_range.start >= start_pos);
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
}
}
@ -260,6 +262,9 @@ struct CaptureState {
capturing: Capturing,
parser_replacements: Vec<ParserReplacement>,
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
// `IntervalSet` is good for perf because attrs are mostly added to this
// set in contiguous ranges.
seen_attrs: IntervalSet<AttrId>,
}
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@ -457,6 +462,7 @@ impl<'a> Parser<'a> {
capturing: Capturing::No,
parser_replacements: Vec::new(),
inner_attr_parser_ranges: Default::default(),
seen_attrs: IntervalSet::new(u32::MAX as usize),
},
current_closure: None,
recovery: Recovery::Allowed,

View file

@ -1,7 +0,0 @@
//@ known-bug: rust-lang/rust#129166
fn main() {
#[cfg_eval]
#[cfg]
0
}

View file

@ -0,0 +1,11 @@
// This was triggering an assertion failure in `NodeRange::new`.
#![feature(cfg_eval)]
#![feature(stmt_expr_attributes)]
fn f() -> u32 {
#[cfg_eval] #[cfg(not(FALSE))] 0
//~^ ERROR removing an expression is not supported in this position
}
fn main() {}

View file

@ -0,0 +1,8 @@
error: removing an expression is not supported in this position
--> $DIR/invalid-node-range-issue-129166.rs:7:17
|
LL | #[cfg_eval] #[cfg(not(FALSE))] 0
| ^^^^^^^^^^^^^^^^^^
error: aborting due to 1 previous error

View file

@ -14,17 +14,15 @@ extern crate test_macros;
macro_rules! produce_it {
($expr:expr) => {
#[derive(Print)]
struct Foo {
val: [bool; {
let a = #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr;
0
}]
}
struct Foo(
[bool; #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr]
);
}
}
produce_it!(#[cfg_attr(not(FALSE), rustc_dummy(second))] {
#![cfg_attr(not(FALSE), allow(unused))]
#![cfg_attr(not(FALSE), rustc_dummy(third))]
#[cfg_attr(not(FALSE), rustc_dummy(fourth))]
30
});

View file

@ -1,21 +1,9 @@
PRINT-DERIVE INPUT (DISPLAY): struct Foo
{
val :
[bool;
{
let a = #[rustc_dummy(first)] #[rustc_dummy(second)]
{ #![allow(unused)] 30 }; 0
}]
}
PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct Foo
{
val :
[bool;
{
let a = #[rustc_dummy(first)] #[rustc_dummy(second)]
{ #! [allow(unused)] 30 }; 0
}]
}
PRINT-DERIVE INPUT (DISPLAY): struct
Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)]
{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]);
PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct
Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)]
{ #! [rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]);
PRINT-DERIVE INPUT (DEBUG): TokenStream [
Ident {
ident: "struct",
@ -26,155 +14,146 @@ PRINT-DERIVE INPUT (DEBUG): TokenStream [
span: $DIR/macro-rules-derive-cfg.rs:17:16: 17:19 (#3),
},
Group {
delimiter: Brace,
delimiter: Parenthesis,
stream: TokenStream [
Ident {
ident: "val",
span: $DIR/macro-rules-derive-cfg.rs:18:13: 18:16 (#3),
},
Punct {
ch: ':',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:18:16: 18:17 (#3),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "bool",
span: $DIR/macro-rules-derive-cfg.rs:18:19: 18:23 (#3),
span: $DIR/macro-rules-derive-cfg.rs:18:14: 18:18 (#3),
},
Punct {
ch: ';',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:18:23: 18:24 (#3),
span: $DIR/macro-rules-derive-cfg.rs:18:18: 18:19 (#3),
},
Punct {
ch: '#',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:18:20: 18:21 (#3),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "rustc_dummy",
span: $DIR/macro-rules-derive-cfg.rs:18:43: 18:54 (#3),
},
Group {
delimiter: Parenthesis,
stream: TokenStream [
Ident {
ident: "first",
span: $DIR/macro-rules-derive-cfg.rs:18:55: 18:60 (#3),
},
],
span: $DIR/macro-rules-derive-cfg.rs:18:54: 18:61 (#3),
},
],
span: $DIR/macro-rules-derive-cfg.rs:18:21: 18:63 (#3),
},
Punct {
ch: '#',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:23:13: 23:14 (#0),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "rustc_dummy",
span: $DIR/macro-rules-derive-cfg.rs:23:36: 23:47 (#0),
},
Group {
delimiter: Parenthesis,
stream: TokenStream [
Ident {
ident: "second",
span: $DIR/macro-rules-derive-cfg.rs:23:48: 23:54 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:23:47: 23:55 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:23:14: 23:57 (#0),
},
Group {
delimiter: Brace,
stream: TokenStream [
Ident {
ident: "let",
span: $DIR/macro-rules-derive-cfg.rs:19:17: 19:20 (#3),
},
Ident {
ident: "a",
span: $DIR/macro-rules-derive-cfg.rs:19:21: 19:22 (#3),
},
Punct {
ch: '=',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:19:23: 19:24 (#3),
},
Punct {
ch: '#',
spacing: Joint,
span: $DIR/macro-rules-derive-cfg.rs:24:5: 24:6 (#0),
},
Punct {
ch: '!',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:19:25: 19:26 (#3),
span: $DIR/macro-rules-derive-cfg.rs:24:6: 24:7 (#0),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "rustc_dummy",
span: $DIR/macro-rules-derive-cfg.rs:19:48: 19:59 (#3),
span: $DIR/macro-rules-derive-cfg.rs:24:29: 24:40 (#0),
},
Group {
delimiter: Parenthesis,
stream: TokenStream [
Ident {
ident: "first",
span: $DIR/macro-rules-derive-cfg.rs:19:60: 19:65 (#3),
ident: "third",
span: $DIR/macro-rules-derive-cfg.rs:24:41: 24:46 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:19:59: 19:66 (#3),
span: $DIR/macro-rules-derive-cfg.rs:24:40: 24:47 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:19:26: 19:68 (#3),
span: $DIR/macro-rules-derive-cfg.rs:24:7: 24:49 (#0),
},
Punct {
ch: '#',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:26:13: 26:14 (#0),
span: $DIR/macro-rules-derive-cfg.rs:25:5: 25:6 (#0),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "rustc_dummy",
span: $DIR/macro-rules-derive-cfg.rs:26:36: 26:47 (#0),
span: $DIR/macro-rules-derive-cfg.rs:25:28: 25:39 (#0),
},
Group {
delimiter: Parenthesis,
stream: TokenStream [
Ident {
ident: "second",
span: $DIR/macro-rules-derive-cfg.rs:26:48: 26:54 (#0),
ident: "fourth",
span: $DIR/macro-rules-derive-cfg.rs:25:40: 25:46 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:26:47: 26:55 (#0),
span: $DIR/macro-rules-derive-cfg.rs:25:39: 25:47 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:26:14: 26:57 (#0),
},
Group {
delimiter: Brace,
stream: TokenStream [
Punct {
ch: '#',
spacing: Joint,
span: $DIR/macro-rules-derive-cfg.rs:27:5: 27:6 (#0),
},
Punct {
ch: '!',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:27:6: 27:7 (#0),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "allow",
span: $DIR/macro-rules-derive-cfg.rs:27:29: 27:34 (#0),
},
Group {
delimiter: Parenthesis,
stream: TokenStream [
Ident {
ident: "unused",
span: $DIR/macro-rules-derive-cfg.rs:27:35: 27:41 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:27:34: 27:42 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:27:7: 27:44 (#0),
},
Literal {
kind: Integer,
symbol: "30",
suffix: None,
span: $DIR/macro-rules-derive-cfg.rs:28:5: 28:7 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:26:58: 29:2 (#0),
},
Punct {
ch: ';',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:19:74: 19:75 (#3),
span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0),
},
Literal {
kind: Integer,
symbol: "0",
symbol: "30",
suffix: None,
span: $DIR/macro-rules-derive-cfg.rs:20:17: 20:18 (#3),
span: $DIR/macro-rules-derive-cfg.rs:26:5: 26:7 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:18:25: 21:14 (#3),
span: $DIR/macro-rules-derive-cfg.rs:23:58: 27:2 (#0),
},
],
span: $DIR/macro-rules-derive-cfg.rs:18:18: 21:15 (#3),
span: $DIR/macro-rules-derive-cfg.rs:18:13: 18:70 (#3),
},
],
span: $DIR/macro-rules-derive-cfg.rs:17:20: 22:10 (#3),
span: $DIR/macro-rules-derive-cfg.rs:17:19: 19:10 (#3),
},
Punct {
ch: ';',
spacing: Alone,
span: $DIR/macro-rules-derive-cfg.rs:19:10: 19:11 (#3),
},
]