syntax: Capture a TokenStream when parsing items

This is then later used by `proc_macro` to generate a new `proc_macro::TokenTree` which preserves span information. Unfortunately this isn't a bullet-proof approach as it doesn't handle the case when there's still other attributes on the item, especially inner attributes. Despite this the intention here is to solve the primary use case for procedural attributes, attached to functions as outer attributes, likely bare. In this situation we should be able to now yield a lossless stream of tokens to preserve span information.
2017-07-12 09:50:05 -07:00 · 2017-07-12 09:50:05 -07:00 · 4886ec8665
commit 4886ec8665
parent 036300aadd
10 changed files with 399 additions and 21 deletions
--- a/src/libproc_macro/lib.rs
+++ b/src/libproc_macro/lib.rs
@ -510,15 +510,38 @@ impl TokenTree {
            Literal(..) | DocComment(..) => TokenNode::Literal(self::Literal(token)),

            Interpolated(ref nt) => {
-                let mut node = None;
-                if let Nonterminal::NtItem(ref item) = nt.0 {
-                    if let Some(ref tokens) = item.tokens {
-                        node = Some(TokenNode::Group(Delimiter::None,
-                                                     TokenStream(tokens.clone())));
+                // An `Interpolated` token means that we have a `Nonterminal`
+                // which is often a parsed AST item. At this point we now need
+                // to convert the parsed AST to an actual token stream, e.g.
+                // un-parse it basically.
+                //
+                // Unfortunately there's not really a great way to do that in a
+                // guaranteed lossless fashion right now. The fallback here is
+                // to just stringify the AST node and reparse it, but this loses
+                // all span information.
+                //
+                // As a result, some AST nodes are annotated with the token
+                // stream they came from. Attempt to extract these lossless
+                // token streams before we fall back to the stringification.
+                let mut tokens = None;
+
+                match nt.0 {
+                    Nonterminal::NtItem(ref item) => {
+                        tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
                    }
+                    Nonterminal::NtTraitItem(ref item) => {
+                        tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
+                    }
+                    Nonterminal::NtImplItem(ref item) => {
+                        tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
+                    }
+                    _ => {}
                }

-                node.unwrap_or_else(|| {
+                tokens.map(|tokens| {
+                    TokenNode::Group(Delimiter::None,
+                                     TokenStream(tokens.clone()))
+                }).unwrap_or_else(|| {
                    __internal::with_sess(|(sess, _)| {
                        TokenNode::Group(Delimiter::None, TokenStream(nt.1.force(|| {
                            // FIXME(jseyfried): Avoid this pretty-print + reparse hack
@ -592,6 +615,34 @@ impl TokenTree {
    }
 }

+fn prepend_attrs(attrs: &[ast::Attribute],
+                 tokens: Option<&tokenstream::TokenStream>,
+                 span: syntax_pos::Span)
+    -> Option<tokenstream::TokenStream>
+{
+    let tokens = match tokens {
+        Some(tokens) => tokens,
+        None => return None,
+    };
+    if attrs.len() == 0 {
+        return Some(tokens.clone())
+    }
+    let mut builder = tokenstream::TokenStreamBuilder::new();
+    for attr in attrs {
+        assert_eq!(attr.style, ast::AttrStyle::Outer,
+                   "inner attributes should prevent cached tokens from existing");
+        let stream = __internal::with_sess(|(sess, _)| {
+            // FIXME: Avoid this pretty-print + reparse hack as bove
+            let name = "<macro expansion>".to_owned();
+            let source = pprust::attr_to_string(attr);
+            parse_stream_from_source_str(name, source, sess, Some(span))
+        });
+        builder.push(stream);
+    }
+    builder.push(tokens.clone());
+    Some(builder.build())
+}
+
 /// Permanently unstable internal implementation details of this crate. This
 /// should not be used.
 ///
--- a/src/libsyntax/ast.rs
+++ b/src/libsyntax/ast.rs
@ -1149,6 +1149,8 @@ pub struct TraitItem {
    pub attrs: Vec<Attribute>,
    pub node: TraitItemKind,
    pub span: Span,
+    /// See `Item::tokens` for what this is
+    pub tokens: Option<TokenStream>,
 }

 #[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug)]
@ -1168,6 +1170,8 @@ pub struct ImplItem {
    pub attrs: Vec<Attribute>,
    pub node: ImplItemKind,
    pub span: Span,
+    /// See `Item::tokens` for what this is
+    pub tokens: Option<TokenStream>,
 }

 #[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug)]
@ -1817,6 +1821,9 @@ pub struct Item {
    /// available for all items, although over time more and more items should
    /// have this be `Some`. Right now this is primarily used for procedural
    /// macros, notably custom attributes.
+    ///
+    /// Note that the tokens here do not include the outer attributes, but will
+    /// include inner attributes.
    pub tokens: Option<TokenStream>,
 }

--- a/src/libsyntax/ext/placeholders.rs
+++ b/src/libsyntax/ext/placeholders.rs
@ -51,11 +51,13 @@ pub fn placeholder(kind: ExpansionKind, id: ast::NodeId) -> Expansion {
        ExpansionKind::TraitItems => Expansion::TraitItems(SmallVector::one(ast::TraitItem {
            id: id, span: span, ident: ident, attrs: attrs,
            node: ast::TraitItemKind::Macro(mac_placeholder()),
+            tokens: None,
        })),
        ExpansionKind::ImplItems => Expansion::ImplItems(SmallVector::one(ast::ImplItem {
            id: id, span: span, ident: ident, vis: vis, attrs: attrs,
            node: ast::ImplItemKind::Macro(mac_placeholder()),
            defaultness: ast::Defaultness::Final,
+            tokens: None,
        })),
        ExpansionKind::Pat => Expansion::Pat(P(ast::Pat {
            id: id, span: span, node: ast::PatKind::Mac(mac_placeholder()),
--- a/src/libsyntax/fold.rs
+++ b/src/libsyntax/fold.rs
@ -957,7 +957,8 @@ pub fn noop_fold_trait_item<T: Folder>(i: TraitItem, folder: &mut T)
                TraitItemKind::Macro(folder.fold_mac(mac))
            }
        },
-        span: folder.new_span(i.span)
+        span: folder.new_span(i.span),
+        tokens: i.tokens,
    })
 }

@ -980,7 +981,8 @@ pub fn noop_fold_impl_item<T: Folder>(i: ImplItem, folder: &mut T)
            ast::ImplItemKind::Type(ty) => ast::ImplItemKind::Type(folder.fold_ty(ty)),
            ast::ImplItemKind::Macro(mac) => ast::ImplItemKind::Macro(folder.fold_mac(mac))
        },
-        span: folder.new_span(i.span)
+        span: folder.new_span(i.span),
+        tokens: i.tokens,
    })
 }

@ -1042,9 +1044,10 @@ pub fn noop_fold_item_simple<T: Folder>(Item {id, ident, attrs, node, vis, span,
        attrs: fold_attrs(attrs, folder),
        node: folder.fold_item_kind(node),
        span: folder.new_span(span),
-        tokens: tokens.map(|tokens| {
-            folder.fold_tts(tokens.into()).into()
-        }),
+
+        // FIXME: if this is replaced with a call to `folder.fold_tts` it causes
+        //        an ICE during resolve... odd!
+        tokens: tokens,
    }
 }

--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@ -843,11 +843,18 @@ mod tests {
    // check the contents of the tt manually:
    #[test] fn parse_fundecl () {
        // this test depends on the intern order of "fn" and "i32"
-        assert_eq!(string_to_item("fn a (b : i32) { b; }".to_string()),
+        let item = string_to_item("fn a (b : i32) { b; }".to_string()).map(|m| {
+            m.map(|mut m| {
+                m.tokens = None;
+                m
+            })
+        });
+        assert_eq!(item,
                  Some(
                      P(ast::Item{ident:Ident::from_str("a"),
                            attrs:Vec::new(),
                            id: ast::DUMMY_NODE_ID,
+                            tokens: None,
                            node: ast::ItemKind::Fn(P(ast::FnDecl {
                                inputs: vec![ast::Arg{
                                    ty: P(ast::Ty{id: ast::DUMMY_NODE_ID,
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@ -216,6 +216,30 @@ struct TokenCursorFrame {
    open_delim: bool,
    tree_cursor: tokenstream::Cursor,
    close_delim: bool,
+    last_token: LastToken,
+}
+
+/// This is used in `TokenCursorFrame` above to track tokens that are consumed
+/// by the parser, and then that's transitively used to record the tokens that
+/// each parse AST item is created with.
+///
+/// Right now this has two states, either collecting tokens or not collecting
+/// tokens. If we're collecting tokens we just save everything off into a local
+/// `Vec`. This should eventually though likely save tokens from the original
+/// token stream and just use slicing of token streams to avoid creation of a
+/// whole new vector.
+///
+/// The second state is where we're passively not recording tokens, but the last
+/// token is still tracked for when we want to start recording tokens. This
+/// "last token" means that when we start recording tokens we'll want to ensure
+/// that this, the first token, is included in the output.
+///
+/// You can find some more example usage of this in the `collect_tokens` method
+/// on the parser.
+#[derive(Clone)]
+enum LastToken {
+    Collecting(Vec<TokenTree>),
+    Was(Option<TokenTree>),
 }

 impl TokenCursorFrame {
@ -226,6 +250,7 @@ impl TokenCursorFrame {
            open_delim: delimited.delim == token::NoDelim,
            tree_cursor: delimited.stream().into_trees(),
            close_delim: delimited.delim == token::NoDelim,
+            last_token: LastToken::Was(None),
        }
    }
 }
@ -250,6 +275,11 @@ impl TokenCursor {
                return TokenAndSpan { tok: token::Eof, sp: syntax_pos::DUMMY_SP }
            };

+            match self.frame.last_token {
+                LastToken::Collecting(ref mut v) => v.push(tree.clone()),
+                LastToken::Was(ref mut t) => *t = Some(tree.clone()),
+            }
+
            match tree {
                TokenTree::Token(sp, tok) => return TokenAndSpan { tok: tok, sp: sp },
                TokenTree::Delimited(sp, ref delimited) => {
@ -1209,7 +1239,20 @@ impl<'a> Parser<'a> {
    /// Parse the items in a trait declaration
    pub fn parse_trait_item(&mut self, at_end: &mut bool) -> PResult<'a, TraitItem> {
        maybe_whole!(self, NtTraitItem, |x| x);
-        let mut attrs = self.parse_outer_attributes()?;
+        let attrs = self.parse_outer_attributes()?;
+        let (mut item, tokens) = self.collect_tokens(|this| {
+            this.parse_trait_item_(at_end, attrs)
+        })?;
+        // See `parse_item` for why this clause is here.
+        if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
+            item.tokens = Some(tokens);
+        }
+        Ok(item)
+    }
+
+    fn parse_trait_item_(&mut self,
+                         at_end: &mut bool,
+                         mut attrs: Vec<Attribute>) -> PResult<'a, TraitItem> {
        let lo = self.span;

        let (name, node) = if self.eat_keyword(keywords::Type) {
@ -1304,6 +1347,7 @@ impl<'a> Parser<'a> {
            attrs: attrs,
            node: node,
            span: lo.to(self.prev_span),
+            tokens: None,
        })
    }

@ -4653,7 +4697,7 @@ impl<'a> Parser<'a> {
            node: node,
            vis: vis,
            span: span,
-            tokens: None, // TODO: fill this in
+            tokens: None,
        })
    }

@ -4709,8 +4753,21 @@ impl<'a> Parser<'a> {
    /// Parse an impl item.
    pub fn parse_impl_item(&mut self, at_end: &mut bool) -> PResult<'a, ImplItem> {
        maybe_whole!(self, NtImplItem, |x| x);
+        let attrs = self.parse_outer_attributes()?;
+        let (mut item, tokens) = self.collect_tokens(|this| {
+            this.parse_impl_item_(at_end, attrs)
+        })?;

-        let mut attrs = self.parse_outer_attributes()?;
+        // See `parse_item` for why this clause is here.
+        if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
+            item.tokens = Some(tokens);
+        }
+        Ok(item)
+    }
+
+    fn parse_impl_item_(&mut self,
+                        at_end: &mut bool,
+                        mut attrs: Vec<Attribute>) -> PResult<'a, ImplItem> {
        let lo = self.span;
        let vis = self.parse_visibility(false)?;
        let defaultness = self.parse_defaultness()?;
@ -4742,7 +4799,8 @@ impl<'a> Parser<'a> {
            vis: vis,
            defaultness: defaultness,
            attrs: attrs,
-            node: node
+            node: node,
+            tokens: None,
        })
    }

@ -6018,9 +6076,71 @@ impl<'a> Parser<'a> {
        Ok(None)
    }

+    fn collect_tokens<F, R>(&mut self, f: F) -> PResult<'a, (R, TokenStream)>
+        where F: FnOnce(&mut Self) -> PResult<'a, R>
+    {
+        // Record all tokens we parse when parsing this item.
+        let mut tokens = Vec::new();
+        match self.token_cursor.frame.last_token {
+            LastToken::Collecting(_) => {
+                panic!("cannot collect tokens recursively yet")
+            }
+            LastToken::Was(ref mut last) => tokens.extend(last.take()),
+        }
+        self.token_cursor.frame.last_token = LastToken::Collecting(tokens);
+        let prev = self.token_cursor.stack.len();
+        let ret = f(self);
+        let last_token = if self.token_cursor.stack.len() == prev {
+            &mut self.token_cursor.frame.last_token
+        } else {
+            &mut self.token_cursor.stack[prev].last_token
+        };
+        let mut tokens = match *last_token {
+            LastToken::Collecting(ref mut v) => mem::replace(v, Vec::new()),
+            LastToken::Was(_) => panic!("our vector went away?"),
+        };
+
+        // If we're not at EOF our current token wasn't actually consumed by
+        // `f`, but it'll still be in our list that we pulled out. In that case
+        // put it back.
+        if self.token == token::Eof {
+            *last_token = LastToken::Was(None);
+        } else {
+            *last_token = LastToken::Was(tokens.pop());
+        }
+
+        Ok((ret?, tokens.into_iter().collect()))
+    }
+
    pub fn parse_item(&mut self) -> PResult<'a, Option<P<Item>>> {
        let attrs = self.parse_outer_attributes()?;
-        self.parse_item_(attrs, true, false)
+
+        let (ret, tokens) = self.collect_tokens(|this| {
+            this.parse_item_(attrs, true, false)
+        })?;
+
+        // Once we've parsed an item and recorded the tokens we got while
+        // parsing we may want to store `tokens` into the item we're about to
+        // return. Note, though, that we specifically didn't capture tokens
+        // related to outer attributes. The `tokens` field here may later be
+        // used with procedural macros to convert this item back into a token
+        // stream, but during expansion we may be removing attributes as we go
+        // along.
+        //
+        // If we've got inner attributes then the `tokens` we've got above holds
+        // these inner attributes. If an inner attribute is expanded we won't
+        // actually remove it from the token stream, so we'll just keep yielding
+        // it (bad!). To work around this case for now we just avoid recording
+        // `tokens` if we detect any inner attributes. This should help keep
+        // expansion correct, but we should fix this bug one day!
+        Ok(ret.map(|item| {
+            item.map(|mut i| {
+                if !i.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
+                    i.tokens = Some(tokens);
+                }
+                i
+            })
+        }))
    }

    fn parse_path_list_items(&mut self) -> PResult<'a, Vec<ast::PathListItem>> {
--- a/src/libsyntax_ext/deriving/generic/mod.rs
+++ b/src/libsyntax_ext/deriving/generic/mod.rs
@ -504,6 +504,7 @@ impl<'a> TraitDef<'a> {
                defaultness: ast::Defaultness::Final,
                attrs: Vec::new(),
                node: ast::ImplItemKind::Type(type_def.to_ty(cx, self.span, type_ident, generics)),
+                tokens: None,
            }
        });

@ -930,6 +931,7 @@ impl<'a> MethodDef<'a> {
                                                decl: fn_decl,
                                            },
                                            body_block),
+            tokens: None,
        }
    }

--- a/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs
+++ b/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs
@ -14,12 +14,38 @@

 extern crate attribute_with_error;

-#[attribute_with_error::foo]
-fn test() {
+use attribute_with_error::foo;
+
+#[foo]
+fn test1() {
    let a: i32 = "foo";
    //~^ ERROR: mismatched types
 }

-fn main() {
-    test();
+fn test2() {
+    #![foo]
+
+    // FIXME: should have a type error here and assert it works but it doesn't
+}
+
+trait A {
+    // FIXME: should have a #[foo] attribute here and assert that it works
+    fn foo(&self) {
+        let a: i32 = "foo";
+        //~^ ERROR: mismatched types
+    }
+}
+
+struct B;
+
+impl A for B {
+    #[foo]
+    fn foo(&self) {
+        let a: i32 = "foo";
+        //~^ ERROR: mismatched types
+    }
+}
+
+#[foo]
+fn main() {
 }
--- a/src/test/compile-fail-fulldeps/proc-macro/attributes-included.rs
+++ b/src/test/compile-fail-fulldeps/proc-macro/attributes-included.rs
@ -0,0 +1,30 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// aux-build:attributes-included.rs
+
+#![feature(proc_macro, rustc_attrs)]
+
+extern crate attributes_included;
+
+#[attributes_included::bar]
+#[inline]
+/// doc
+#[attributes_included::foo]
+#[inline]
+/// doc
+fn foo() {
+    let a: i32 = "foo"; //~ WARN: unused variable
+}
+
+#[rustc_error]
+fn main() { //~ ERROR: compilation successful
+    foo()
+}
--- a/src/test/compile-fail-fulldeps/proc-macro/auxiliary/attributes-included.rs
+++ b/src/test/compile-fail-fulldeps/proc-macro/auxiliary/attributes-included.rs
@ -0,0 +1,130 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// force-host
+// no-prefer-dynamic
+
+#![feature(proc_macro)]
+#![crate_type = "proc-macro"]
+
+extern crate proc_macro;
+
+use proc_macro::{TokenStream, TokenTree, TokenNode, Delimiter, Literal};
+
+#[proc_macro_attribute]
+pub fn foo(attr: TokenStream, input: TokenStream) -> TokenStream {
+    assert!(attr.is_empty());
+    let input = input.into_iter().collect::<Vec<_>>();
+    {
+        let mut cursor = &input[..];
+        assert_inline(&mut cursor);
+        assert_doc(&mut cursor);
+        assert_inline(&mut cursor);
+        assert_doc(&mut cursor);
+        assert_foo(&mut cursor);
+        assert!(cursor.is_empty());
+    }
+    fold_stream(input.into_iter().collect())
+}
+
+#[proc_macro_attribute]
+pub fn bar(attr: TokenStream, input: TokenStream) -> TokenStream {
+    assert!(attr.is_empty());
+    let input = input.into_iter().collect::<Vec<_>>();
+    {
+        let mut cursor = &input[..];
+        assert_inline(&mut cursor);
+        assert_doc(&mut cursor);
+        assert_invoc(&mut cursor);
+        assert_inline(&mut cursor);
+        assert_doc(&mut cursor);
+        assert_foo(&mut cursor);
+        assert!(cursor.is_empty());
+    }
+    input.into_iter().collect()
+}
+
+fn assert_inline(slice: &mut &[TokenTree]) {
+    match slice[0].kind {
+        TokenNode::Op('#', _) => {}
+        _ => panic!("expected '#' char"),
+    }
+    match slice[1].kind {
+        TokenNode::Group(Delimiter::Bracket, _) => {}
+        _ => panic!("expected brackets"),
+    }
+    *slice = &slice[2..];
+}
+
+fn assert_doc(slice: &mut &[TokenTree]) {
+    match slice[0].kind {
+        TokenNode::Literal(_) => {}
+        _ => panic!("expected literal doc comment got other"),
+    }
+    *slice = &slice[1..];
+}
+
+fn assert_invoc(slice: &mut &[TokenTree]) {
+    match slice[0].kind {
+        TokenNode::Op('#', _) => {}
+        _ => panic!("expected '#' char"),
+    }
+    match slice[1].kind {
+        TokenNode::Group(Delimiter::Bracket, _) => {}
+        _ => panic!("expected brackets"),
+    }
+    *slice = &slice[2..];
+}
+
+fn assert_foo(slice: &mut &[TokenTree]) {
+    match slice[0].kind {
+        TokenNode::Term(ref name) => assert_eq!(name.as_str(), "fn"),
+        _ => panic!("expected fn"),
+    }
+    match slice[1].kind {
+        TokenNode::Term(ref name) => assert_eq!(name.as_str(), "foo"),
+        _ => panic!("expected foo"),
+    }
+    match slice[2].kind {
+        TokenNode::Group(Delimiter::Parenthesis, ref s) => assert!(s.is_empty()),
+        _ => panic!("expected parens"),
+    }
+    match slice[3].kind {
+        TokenNode::Group(Delimiter::Brace, _) => {}
+        _ => panic!("expected braces"),
+    }
+    *slice = &slice[4..];
+}
+
+fn fold_stream(input: TokenStream) -> TokenStream {
+    input.into_iter().map(fold_tree).collect()
+}
+
+fn fold_tree(input: TokenTree) -> TokenTree {
+    TokenTree {
+        span: input.span,
+        kind: fold_node(input.kind),
+    }
+}
+
+fn fold_node(input: TokenNode) -> TokenNode {
+    match input {
+        TokenNode::Group(a, b) => TokenNode::Group(a, fold_stream(b)),
+        TokenNode::Op(a, b) => TokenNode::Op(a, b),
+        TokenNode::Term(a) => TokenNode::Term(a),
+        TokenNode::Literal(a) => {
+            if a.to_string() != "\"foo\"" {
+                TokenNode::Literal(a)
+            } else {
+                TokenNode::Literal(Literal::integer(3))
+            }
+        }
+    }
+}