Start library
This commit is contained in:
parent
268cb2a04e
commit
e132280844
5 changed files with 113 additions and 190 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
/target/
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
name = "libsyntax2"
|
||||
version = "0.1.0"
|
||||
authors = ["Aleksey Kladov <aleksey.kladov@gmail.com>"]
|
||||
|
||||
[dependencies]
|
152
minirust.rs
152
minirust.rs
|
@ -1,152 +0,0 @@
|
|||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct NodeKind(u16);
|
||||
|
||||
pub struct File {
|
||||
text: String,
|
||||
nodes: Vec<NodeData>,
|
||||
}
|
||||
|
||||
struct NodeData {
|
||||
kind: NodeKind,
|
||||
range: (u32, u32),
|
||||
parent: Option<u32>,
|
||||
first_child: Option<u32>,
|
||||
next_sibling: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Node<'f> {
|
||||
file: &'f File,
|
||||
idx: u32,
|
||||
}
|
||||
|
||||
pub struct Children<'f> {
|
||||
next: Option<Node<'f>>,
|
||||
}
|
||||
|
||||
impl File {
|
||||
pub fn root<'f>(&'f self) -> Node<'f> {
|
||||
assert!(!self.nodes.is_empty());
|
||||
Node { file: self, idx: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> Node<'f> {
|
||||
pub fn kind(&self) -> NodeKind {
|
||||
self.data().kind
|
||||
}
|
||||
|
||||
pub fn text(&self) -> &'f str {
|
||||
let (start, end) = self.data().range;
|
||||
&self.file.text[start as usize..end as usize]
|
||||
}
|
||||
|
||||
pub fn parent(&self) -> Option<Node<'f>> {
|
||||
self.as_node(self.data().parent)
|
||||
}
|
||||
|
||||
pub fn children(&self) -> Children<'f> {
|
||||
Children { next: self.as_node(self.data().first_child) }
|
||||
}
|
||||
|
||||
fn data(&self) -> &'f NodeData {
|
||||
&self.file.nodes[self.idx as usize]
|
||||
}
|
||||
|
||||
fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> {
|
||||
idx.map(|idx| Node { file: self.file, idx })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> Iterator for Children<'f> {
|
||||
type Item = Node<'f>;
|
||||
|
||||
fn next(&mut self) -> Option<Node<'f>> {
|
||||
let next = self.next;
|
||||
self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
pub const ERROR: NodeKind = NodeKind(0);
|
||||
pub const WHITESPACE: NodeKind = NodeKind(1);
|
||||
pub const STRUCT_KW: NodeKind = NodeKind(2);
|
||||
pub const IDENT: NodeKind = NodeKind(3);
|
||||
pub const L_CURLY: NodeKind = NodeKind(4);
|
||||
pub const R_CURLY: NodeKind = NodeKind(5);
|
||||
pub const COLON: NodeKind = NodeKind(6);
|
||||
pub const COMMA: NodeKind = NodeKind(7);
|
||||
pub const AMP: NodeKind = NodeKind(8);
|
||||
pub const LINE_COMMENT: NodeKind = NodeKind(9);
|
||||
pub const FILE: NodeKind = NodeKind(10);
|
||||
pub const STRUCT_DEF: NodeKind = NodeKind(11);
|
||||
pub const FIELD_DEF: NodeKind = NodeKind(12);
|
||||
pub const TYPE_REF: NodeKind = NodeKind(13);
|
||||
|
||||
|
||||
pub trait AstNode<'f>: Copy + 'f {
|
||||
fn new(node: Node<'f>) -> Option<Self>;
|
||||
fn node(&self) -> Node<'f>;
|
||||
}
|
||||
|
||||
pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> {
|
||||
node.children().find(|child| child.kind() == kind)
|
||||
}
|
||||
|
||||
pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> {
|
||||
Box::new(node.children().filter_map(A::new))
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct StructDef<'f>(Node<'f>);
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct FieldDef<'f>(Node<'f>);
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct TypeRef<'f>(Node<'f>);
|
||||
|
||||
pub trait NameOwner<'f>: AstNode<'f> {
|
||||
fn name_ident(&self) -> Node<'f> {
|
||||
child_of_kind(self.node(), IDENT).unwrap()
|
||||
}
|
||||
|
||||
fn name(&self) -> &'f str { self.name_ident().text() }
|
||||
}
|
||||
|
||||
|
||||
impl<'f> AstNode<'f> for StructDef<'f> {
|
||||
fn new(node: Node<'f>) -> Option<Self> {
|
||||
if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None }
|
||||
}
|
||||
fn node(&self) -> Node<'f> { self.0 }
|
||||
}
|
||||
|
||||
impl<'f> AstNode<'f> for FieldDef<'f> {
|
||||
fn new(node: Node<'f>) -> Option<Self> {
|
||||
if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None }
|
||||
}
|
||||
fn node(&self) -> Node<'f> { self.0 }
|
||||
}
|
||||
|
||||
impl<'f> AstNode<'f> for TypeRef<'f> {
|
||||
fn new(node: Node<'f>) -> Option<Self> {
|
||||
if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None }
|
||||
}
|
||||
fn node(&self) -> Node<'f> { self.0 }
|
||||
}
|
||||
|
||||
impl<'f> NameOwner<'f> for StructDef<'f> {}
|
||||
impl<'f> NameOwner<'f> for FieldDef<'f> {}
|
||||
|
||||
impl<'f> StructDef<'f> {
|
||||
pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> {
|
||||
ast_children(self.node())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> FieldDef<'f> {
|
||||
pub fn type_ref(&self) -> Option<TypeRef<'f>> {
|
||||
ast_children(self.node()).next()
|
||||
}
|
||||
}
|
135
rfc.md
135
rfc.md
|
@ -30,12 +30,66 @@ other tools, and eventual libsyntax removal.
|
|||
|
||||
Note that this RFC does not propose to stabilize any API for working
|
||||
with rust syntax: the semver version of the hypothetical library would
|
||||
be `0.1.0`.
|
||||
be `0.1.0`. It is intended to be used by tools, which are currently
|
||||
closely related to the compiler: `rustc`, `rustfmt`, `clippy`, `rls`
|
||||
and hypothetical `rustfix`. While it would be possible to create
|
||||
third-party tools on top of the new libsyntax, the burden of adopting
|
||||
to breaking changes would be on authors of such tools.
|
||||
|
||||
|
||||
# Motivation
|
||||
[motivation]: #motivation
|
||||
|
||||
There are two main drawbacks with the current version of libsyntax:
|
||||
|
||||
* It is tightly integrated with the compiler and hard to use
|
||||
independently
|
||||
|
||||
* The AST representation is not well-suited for use inside IDEs
|
||||
|
||||
|
||||
## IDE support
|
||||
|
||||
There are several differences in how IDEs and compilers typically
|
||||
treat source code.
|
||||
|
||||
In the compiler, it is convenient to transform the source
|
||||
code into Abstract Syntax Tree form, which is independent of the
|
||||
surface syntax. For example, it's convenient to discard comments,
|
||||
whitespaces and desugar some syntactic constructs in terms of the
|
||||
simpler ones.
|
||||
|
||||
In contrast, IDEs work much closer to the source code, so it is
|
||||
crucial to preserve full information about the original text. For
|
||||
example, IDE may adjust indentation after typing a `}` which closes a
|
||||
block, and to do this correctly, IDE must be aware of syntax (that is,
|
||||
that `}` indeed closes some block, and is not a syntax error) and of
|
||||
all whitespaces and comments. So, IDE suitable AST should explicitly
|
||||
account for syntactic elements, not considered important by the
|
||||
compiler.
|
||||
|
||||
Another difference is that IDEs typically work with incomplete and
|
||||
syntactically invalid code. This boils down to two parser properties.
|
||||
First, the parser must produce syntax tree even if some required input
|
||||
is missing. For example, for input `fn foo` the function node should
|
||||
be present in the parse, despite the fact that there is no parameters
|
||||
or body. Second, the parser must be able to skip over parts of input
|
||||
it can't recognize and aggressively recover from errors. That is, the
|
||||
syntax tree data structure should be able to handle both missing and
|
||||
extra nodes.
|
||||
|
||||
IDEs also need the ability to incrementally reparse and relex source
|
||||
code after the user types. A smart IDE would use syntax tree structure
|
||||
to handle editing commands (for example, to add/remove trailing commas
|
||||
after join/split lines actions), so parsing time can be very
|
||||
noticeable.
|
||||
|
||||
|
||||
Currently rustc uses the classical AST approach, and preserves some of
|
||||
the source code information in the form of spans in the AST. It is not
|
||||
clear if this structure can full fill all IDE requirements.
|
||||
|
||||
|
||||
## Reusability
|
||||
|
||||
In theory, the parser can be a pure function, which takes a `&str` as
|
||||
|
@ -67,29 +121,6 @@ files. As a data point, it turned out to be easier to move `rustfmt`
|
|||
into the main `rustc` repository than to move libsyntax outside!
|
||||
|
||||
|
||||
## IDE support
|
||||
|
||||
There is one big difference in how IDEs and compilers typically treat
|
||||
source code.
|
||||
|
||||
In the compiler, it is convenient to transform the source
|
||||
code into Abstract Syntax Tree form, which is independent of the
|
||||
surface syntax. For example, it's convenient to discard comments,
|
||||
whitespaces and desugar some syntactic constructs in terms of the
|
||||
simpler ones.
|
||||
|
||||
In contrast, for IDEs it is crucial to have a lossless view of the
|
||||
source code because, for example, it's important to preserve comments
|
||||
during refactorings. Ideally, IDEs should be able to incrementally
|
||||
relex and reparse the file as the user types, because syntax tree is
|
||||
necessary to correctly handle certain code-editing actions like
|
||||
autoindentation or joining lines. IDE also must be able to produce
|
||||
partial parse trees when some input is missing or invalid.
|
||||
|
||||
Currently rustc uses the AST approach, and preserves some of the
|
||||
source code information in the form of spans in the AST.
|
||||
|
||||
|
||||
# Guide-level explanation
|
||||
[guide-level-explanation]: #guide-level-explanation
|
||||
|
||||
|
@ -99,11 +130,33 @@ Not applicable.
|
|||
# Reference-level explanation
|
||||
[reference-level-explanation]: #reference-level-explanation
|
||||
|
||||
This section proposes a new syntax tree data structure, which should
|
||||
be suitable for both compiler and IDE. It is heavily inspired by [PSI]
|
||||
data structure which used in [IntelliJ] based IDEs and in the [Kotlin]
|
||||
compiler.
|
||||
It is not clear if a single parser can accommodate the needs of the
|
||||
compiler and the IDE, but there is hope that it is possible. The RFC
|
||||
proposes to develop libsynax2.0 as an experimental crates.io crate. If
|
||||
the experiment turns out to be a success, the second RFC will propose
|
||||
to integrate it with all existing tools and `rustc`.
|
||||
|
||||
Next, a syntax tree data structure is proposed for libsyntax2.0. It
|
||||
seems to have the following important properties:
|
||||
|
||||
* It is lossless and faithfully represents the original source code,
|
||||
including explicit nodes for comments and whitespace.
|
||||
|
||||
* It is flexible and allows to encode arbitrary node structure,
|
||||
even for invalid syntax.
|
||||
|
||||
* It is minimal: it stores small amount of data and has no
|
||||
dependencies. For instance, it does not need compiler's string
|
||||
interner or literal data representation.
|
||||
|
||||
* While the tree itself is minimal, it is extensible in a sense that
|
||||
it possible to associate arbitrary data with certain nodes in a
|
||||
type-safe way.
|
||||
|
||||
|
||||
It is not clear if this representation is the best one. It is heavily
|
||||
inspired by [PSI] data structure which used in [IntelliJ] based IDEs
|
||||
and in the [Kotlin] compiler.
|
||||
|
||||
[PSI]: http://www.jetbrains.org/intellij/sdk/docs/reference_guide/custom_language_support/implementing_parser_and_psi.html
|
||||
[IntelliJ]: https://github.com/JetBrains/intellij-community/
|
||||
|
@ -351,6 +404,11 @@ impl<'f> AstNode<'f> for TypeRef<'f> {
|
|||
}
|
||||
```
|
||||
|
||||
Note that although AST wrappers provide a type-safe access to the
|
||||
tree, they are still represented as indexes, so clients of the syntax
|
||||
tree can easily associated additional data with AST nodes by storing
|
||||
it in a side-table.
|
||||
|
||||
|
||||
## Missing Source Code
|
||||
|
||||
|
@ -374,7 +432,8 @@ This RFC proposes huge changes to the internals of the compiler, so
|
|||
it's important to proceed carefully and incrementally. The following
|
||||
plan is suggested:
|
||||
|
||||
* RFC discussion about the theoretical feasibility of the proposal.
|
||||
* RFC discussion about the theoretical feasibility of the proposal,
|
||||
and the best representation representation for the syntax tree.
|
||||
|
||||
* Implementation of the proposal as a completely separate crates.io
|
||||
crate, by refactoring existing libsyntax source code to produce a
|
||||
|
@ -393,11 +452,11 @@ plan is suggested:
|
|||
- No harm will be done as long as the new libsyntax exists as an
|
||||
experiemt on crates.io. However, actually using it in the compiler
|
||||
and other tools would require massive refactorings.
|
||||
|
||||
- Proposed syntax tree requires to keep the original source code
|
||||
available, which might increase memory usage of the
|
||||
compiler. However, it should be possible to throw the original tree
|
||||
and source code away after conversion to HIR.
|
||||
|
||||
- It's difficult to know upfront if the proposed syntax tree would
|
||||
actually work well in both the compiler and IDE. It may be possible
|
||||
that some drawbacks will be discovered during implementation.
|
||||
|
||||
|
||||
# Rationale and alternatives
|
||||
[alternatives]: #alternatives
|
||||
|
@ -422,14 +481,14 @@ plan is suggested:
|
|||
the source code? It seems like the answer is yes, because the
|
||||
language and especially macros were cleverly designed with this
|
||||
use-case in mind.
|
||||
|
||||
|
||||
|
||||
|
||||
- Is it possible to implement macro expansion using the proposed
|
||||
framework? This is the main question of this RFC. The proposed
|
||||
solution of synthesizing source code on the fly seems workable: it's
|
||||
not that different from the current implementation, which
|
||||
synthesizes token trees.
|
||||
|
||||
|
||||
|
||||
|
||||
- How to actually phase out current libsyntax, if libsyntax2.0 turns
|
||||
out to be a success?
|
||||
|
|
7
src/lib.rs
Normal file
7
src/lib.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn it_works() {
|
||||
assert_eq!(2 + 2, 4);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue