Merge #1336
1336: Refactor SubtreeSource r=matklad a=edwin0cheng This PR simplify `SubtreeSource` by removing `SubtreeWalk` and `Querier` and only walk through the top level `TokenTree` when collecting token from source, by comparing two cursors directly. Co-authored-by: Edwin Cheng <edwin0cheng@gmail.com>
This commit is contained in:
commit
0545e4781d
4 changed files with 149 additions and 156 deletions
|
@ -2,16 +2,38 @@ use crate::subtree_source::SubtreeTokenSource;
|
|||
|
||||
use ra_parser::{TokenSource, TreeSink};
|
||||
use ra_syntax::{SyntaxKind};
|
||||
use tt::buffer::TokenBuffer;
|
||||
use tt::buffer::{TokenBuffer, Cursor};
|
||||
|
||||
struct OffsetTokenSink {
|
||||
token_pos: usize,
|
||||
struct OffsetTokenSink<'a> {
|
||||
cursor: Cursor<'a>,
|
||||
error: bool,
|
||||
}
|
||||
|
||||
impl TreeSink for OffsetTokenSink {
|
||||
impl<'a> OffsetTokenSink<'a> {
|
||||
pub fn collect(&self, begin: Cursor<'a>) -> Vec<tt::TokenTree> {
|
||||
if !self.cursor.is_root() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut curr = begin;
|
||||
let mut res = vec![];
|
||||
|
||||
while self.cursor != curr {
|
||||
if let Some(token) = curr.token_tree() {
|
||||
res.push(token);
|
||||
}
|
||||
curr = curr.bump();
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TreeSink for OffsetTokenSink<'a> {
|
||||
fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) {
|
||||
self.token_pos += n_tokens as usize;
|
||||
for _ in 0..n_tokens {
|
||||
self.cursor = self.cursor.bump_subtree();
|
||||
}
|
||||
}
|
||||
fn start_node(&mut self, _kind: SyntaxKind) {}
|
||||
fn finish_node(&mut self) {}
|
||||
|
@ -72,23 +94,21 @@ impl<'a> Parser<'a> {
|
|||
{
|
||||
let buffer = TokenBuffer::new(&self.subtree.token_trees[*self.cur_pos..]);
|
||||
let mut src = SubtreeTokenSource::new(&buffer);
|
||||
let mut sink = OffsetTokenSink { token_pos: 0, error: false };
|
||||
let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false };
|
||||
|
||||
f(&mut src, &mut sink);
|
||||
|
||||
let r = self.finish(sink.token_pos, &mut src);
|
||||
let r = self.finish(buffer.begin(), &mut sink);
|
||||
if sink.error {
|
||||
return None;
|
||||
}
|
||||
r
|
||||
}
|
||||
|
||||
fn finish(self, parsed_token: usize, src: &mut SubtreeTokenSource) -> Option<tt::TokenTree> {
|
||||
let res = src.bump_n(parsed_token);
|
||||
fn finish(self, begin: Cursor, sink: &mut OffsetTokenSink) -> Option<tt::TokenTree> {
|
||||
let res = sink.collect(begin);
|
||||
*self.cur_pos += res.len();
|
||||
|
||||
let res: Vec<_> = res.into_iter().collect();
|
||||
|
||||
match res.len() {
|
||||
0 => None,
|
||||
1 => Some(res[0].clone()),
|
||||
|
|
|
@ -1,13 +1,8 @@
|
|||
use ra_parser::{TokenSource, Token};
|
||||
use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*, T};
|
||||
use std::cell::{RefCell, Cell};
|
||||
use std::sync::Arc;
|
||||
use tt::buffer::{TokenBuffer, Cursor};
|
||||
|
||||
pub(crate) trait Querier {
|
||||
fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr, bool);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
struct TtToken {
|
||||
pub kind: SyntaxKind,
|
||||
|
@ -15,20 +10,40 @@ struct TtToken {
|
|||
pub text: SmolStr,
|
||||
}
|
||||
|
||||
// A wrapper class for ref cell
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct SubtreeWalk<'a> {
|
||||
start: Cursor<'a>,
|
||||
cursor: Cell<Cursor<'a>>,
|
||||
pub(crate) struct SubtreeTokenSource<'a> {
|
||||
cached_cursor: Cell<Cursor<'a>>,
|
||||
cached: RefCell<Vec<Option<TtToken>>>,
|
||||
curr: (Token, usize),
|
||||
}
|
||||
|
||||
impl<'a> SubtreeWalk<'a> {
|
||||
fn new(cursor: Cursor<'a>) -> Self {
|
||||
SubtreeWalk {
|
||||
start: cursor,
|
||||
cursor: Cell::new(cursor),
|
||||
impl<'a> SubtreeTokenSource<'a> {
|
||||
// Helper function used in test
|
||||
#[cfg(test)]
|
||||
pub fn text(&self) -> SmolStr {
|
||||
match self.get(self.curr.1) {
|
||||
Some(tt) => tt.text,
|
||||
_ => SmolStr::new(""),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SubtreeTokenSource<'a> {
|
||||
pub fn new(buffer: &'a TokenBuffer) -> SubtreeTokenSource<'a> {
|
||||
let cursor = buffer.begin();
|
||||
|
||||
let mut res = SubtreeTokenSource {
|
||||
curr: (Token { kind: EOF, is_jointed_to_next: false }, 0),
|
||||
cached_cursor: Cell::new(cursor),
|
||||
cached: RefCell::new(Vec::with_capacity(10)),
|
||||
};
|
||||
res.curr = (res.mk_token(0), 0);
|
||||
res
|
||||
}
|
||||
|
||||
fn mk_token(&self, pos: usize) -> Token {
|
||||
match self.get(pos) {
|
||||
Some(tt) => Token { kind: tt.kind, is_jointed_to_next: tt.is_joint_to_next },
|
||||
None => Token { kind: EOF, is_jointed_to_next: false },
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -39,7 +54,7 @@ impl<'a> SubtreeWalk<'a> {
|
|||
}
|
||||
|
||||
while pos >= cached.len() {
|
||||
let cursor = self.cursor.get();
|
||||
let cursor = self.cached_cursor.get();
|
||||
if cursor.eof() {
|
||||
cached.push(None);
|
||||
continue;
|
||||
|
@ -48,16 +63,16 @@ impl<'a> SubtreeWalk<'a> {
|
|||
match cursor.token_tree() {
|
||||
Some(tt::TokenTree::Leaf(leaf)) => {
|
||||
cached.push(Some(convert_leaf(&leaf)));
|
||||
self.cursor.set(cursor.bump());
|
||||
self.cached_cursor.set(cursor.bump());
|
||||
}
|
||||
Some(tt::TokenTree::Subtree(subtree)) => {
|
||||
self.cursor.set(cursor.subtree().unwrap());
|
||||
self.cached_cursor.set(cursor.subtree().unwrap());
|
||||
cached.push(Some(convert_delim(subtree.delimiter, false)));
|
||||
}
|
||||
None => {
|
||||
if let Some(subtree) = cursor.end() {
|
||||
cached.push(Some(convert_delim(subtree.delimiter, true)));
|
||||
self.cursor.set(cursor.bump());
|
||||
self.cached_cursor.set(cursor.bump());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -65,88 +80,6 @@ impl<'a> SubtreeWalk<'a> {
|
|||
|
||||
return cached[pos].clone();
|
||||
}
|
||||
|
||||
fn collect_token_trees(&self, n: usize) -> Vec<tt::TokenTree> {
|
||||
let mut res = vec![];
|
||||
|
||||
let mut pos = 0;
|
||||
let mut cursor = self.start;
|
||||
let mut level = 0;
|
||||
|
||||
while pos < n {
|
||||
if cursor.eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
match cursor.token_tree() {
|
||||
Some(tt::TokenTree::Leaf(leaf)) => {
|
||||
if level == 0 {
|
||||
res.push(leaf.into());
|
||||
}
|
||||
cursor = cursor.bump();
|
||||
pos += 1;
|
||||
}
|
||||
Some(tt::TokenTree::Subtree(subtree)) => {
|
||||
if level == 0 {
|
||||
res.push(subtree.into());
|
||||
}
|
||||
pos += 1;
|
||||
level += 1;
|
||||
cursor = cursor.subtree().unwrap();
|
||||
}
|
||||
|
||||
None => {
|
||||
if let Some(_) = cursor.end() {
|
||||
level -= 1;
|
||||
pos += 1;
|
||||
cursor = cursor.bump();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Querier for SubtreeWalk<'a> {
|
||||
fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr, bool) {
|
||||
self.get(uidx)
|
||||
.map(|tkn| (tkn.kind, tkn.text, tkn.is_joint_to_next))
|
||||
.unwrap_or_else(|| (SyntaxKind::EOF, "".into(), false))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct SubtreeTokenSource<'a> {
|
||||
walker: Arc<SubtreeWalk<'a>>,
|
||||
curr: (Token, usize),
|
||||
}
|
||||
|
||||
impl<'a> SubtreeTokenSource<'a> {
|
||||
pub fn new(buffer: &'a TokenBuffer) -> SubtreeTokenSource<'a> {
|
||||
let mut res = SubtreeTokenSource {
|
||||
walker: Arc::new(SubtreeWalk::new(buffer.begin())),
|
||||
curr: (Token { kind: EOF, is_jointed_to_next: false }, 0),
|
||||
};
|
||||
res.curr = (res.mk_token(0), 0);
|
||||
res
|
||||
}
|
||||
|
||||
pub fn querier(&self) -> Arc<SubtreeWalk<'a>> {
|
||||
self.walker.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn bump_n(&mut self, parsed_tokens: usize) -> Vec<tt::TokenTree> {
|
||||
let res = self.walker.collect_token_trees(parsed_tokens);
|
||||
res
|
||||
}
|
||||
|
||||
fn mk_token(&self, pos: usize) -> Token {
|
||||
match self.walker.get(pos) {
|
||||
Some(tt) => Token { kind: tt.kind, is_jointed_to_next: tt.is_joint_to_next },
|
||||
None => Token { kind: EOF, is_jointed_to_next: false },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenSource for SubtreeTokenSource<'a> {
|
||||
|
@ -165,12 +98,12 @@ impl<'a> TokenSource for SubtreeTokenSource<'a> {
|
|||
return;
|
||||
}
|
||||
|
||||
self.curr = (self.mk_token(self.curr.1 + 1), self.curr.1 + 1)
|
||||
self.curr = (self.mk_token(self.curr.1 + 1), self.curr.1 + 1);
|
||||
}
|
||||
|
||||
/// Is the current token a specified keyword?
|
||||
fn is_keyword(&self, kw: &str) -> bool {
|
||||
match self.walker.get(self.curr.1) {
|
||||
match self.get(self.curr.1) {
|
||||
Some(t) => t.text == *kw,
|
||||
_ => false,
|
||||
}
|
||||
|
|
|
@ -3,8 +3,8 @@ use ra_syntax::{
|
|||
AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement,
|
||||
ast, SyntaxKind::*, TextUnit, T
|
||||
};
|
||||
|
||||
use crate::subtree_source::{SubtreeTokenSource, Querier};
|
||||
use tt::buffer::{TokenBuffer, Cursor};
|
||||
use crate::subtree_source::{SubtreeTokenSource};
|
||||
use crate::ExpandError;
|
||||
|
||||
/// Maps `tt::TokenId` to the relative range of the original token.
|
||||
|
@ -49,10 +49,9 @@ fn token_tree_to_syntax_node<F>(tt: &tt::Subtree, f: F) -> Result<TreeArc<Syntax
|
|||
where
|
||||
F: Fn(&mut ra_parser::TokenSource, &mut ra_parser::TreeSink),
|
||||
{
|
||||
let buffer = tt::buffer::TokenBuffer::new(&[tt.clone().into()]);
|
||||
let buffer = TokenBuffer::new(&[tt.clone().into()]);
|
||||
let mut token_source = SubtreeTokenSource::new(&buffer);
|
||||
let querier = token_source.querier();
|
||||
let mut tree_sink = TtTreeSink::new(querier.as_ref());
|
||||
let mut tree_sink = TtTreeSink::new(buffer.begin());
|
||||
f(&mut token_source, &mut tree_sink);
|
||||
if tree_sink.roots.len() != 1 {
|
||||
return Err(ExpandError::ConversionError);
|
||||
|
@ -259,11 +258,10 @@ fn convert_tt(
|
|||
Some(res)
|
||||
}
|
||||
|
||||
struct TtTreeSink<'a, Q: Querier> {
|
||||
struct TtTreeSink<'a> {
|
||||
buf: String,
|
||||
src_querier: &'a Q,
|
||||
cursor: Cursor<'a>,
|
||||
text_pos: TextUnit,
|
||||
token_pos: usize,
|
||||
inner: SyntaxTreeBuilder,
|
||||
|
||||
// Number of roots
|
||||
|
@ -271,52 +269,75 @@ struct TtTreeSink<'a, Q: Querier> {
|
|||
roots: smallvec::SmallVec<[usize; 1]>,
|
||||
}
|
||||
|
||||
impl<'a, Q: Querier> TtTreeSink<'a, Q> {
|
||||
fn new(src_querier: &'a Q) -> Self {
|
||||
impl<'a> TtTreeSink<'a> {
|
||||
fn new(cursor: Cursor<'a>) -> Self {
|
||||
TtTreeSink {
|
||||
buf: String::new(),
|
||||
src_querier,
|
||||
cursor,
|
||||
text_pos: 0.into(),
|
||||
token_pos: 0,
|
||||
inner: SyntaxTreeBuilder::default(),
|
||||
roots: smallvec::SmallVec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_delimiter(kind: SyntaxKind) -> bool {
|
||||
match kind {
|
||||
T!['('] | T!['['] | T!['{'] | T![')'] | T![']'] | T!['}'] => true,
|
||||
_ => false,
|
||||
}
|
||||
fn delim_to_str(d: tt::Delimiter, closing: bool) -> SmolStr {
|
||||
let texts = match d {
|
||||
tt::Delimiter::Parenthesis => "()",
|
||||
tt::Delimiter::Brace => "{}",
|
||||
tt::Delimiter::Bracket => "[]",
|
||||
tt::Delimiter::None => "",
|
||||
};
|
||||
|
||||
let idx = closing as usize;
|
||||
let text = if texts.len() > 0 { &texts[idx..texts.len() - (1 - idx)] } else { "" };
|
||||
text.into()
|
||||
}
|
||||
|
||||
impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
|
||||
impl<'a> TreeSink for TtTreeSink<'a> {
|
||||
fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
|
||||
if kind == L_DOLLAR || kind == R_DOLLAR {
|
||||
self.token_pos += n_tokens as usize;
|
||||
self.cursor = self.cursor.bump_subtree();
|
||||
return;
|
||||
}
|
||||
|
||||
for _ in 0..n_tokens {
|
||||
self.buf += &self.src_querier.token(self.token_pos).1;
|
||||
self.token_pos += 1;
|
||||
if self.cursor.eof() {
|
||||
break;
|
||||
}
|
||||
|
||||
match self.cursor.token_tree() {
|
||||
Some(tt::TokenTree::Leaf(leaf)) => {
|
||||
self.cursor = self.cursor.bump();
|
||||
self.buf += &format!("{}", leaf);
|
||||
}
|
||||
Some(tt::TokenTree::Subtree(subtree)) => {
|
||||
self.cursor = self.cursor.subtree().unwrap();
|
||||
self.buf += &delim_to_str(subtree.delimiter, false);
|
||||
}
|
||||
None => {
|
||||
if let Some(parent) = self.cursor.end() {
|
||||
self.cursor = self.cursor.bump();
|
||||
self.buf += &delim_to_str(parent.delimiter, true);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
self.text_pos += TextUnit::of_str(&self.buf);
|
||||
let text = SmolStr::new(self.buf.as_str());
|
||||
self.buf.clear();
|
||||
self.inner.token(kind, text);
|
||||
|
||||
// Add a white space between tokens, only if both are not delimiters
|
||||
if !is_delimiter(kind) {
|
||||
let (last_kind, _, last_joint_to_next) = self.src_querier.token(self.token_pos - 1);
|
||||
if !last_joint_to_next && last_kind.is_punct() {
|
||||
let (cur_kind, _, _) = self.src_querier.token(self.token_pos);
|
||||
if !is_delimiter(cur_kind) {
|
||||
if cur_kind.is_punct() {
|
||||
self.inner.token(WHITESPACE, " ".into());
|
||||
}
|
||||
}
|
||||
// Add whitespace between adjoint puncts
|
||||
let next = self.cursor.bump();
|
||||
if let (
|
||||
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(curr))),
|
||||
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(_))),
|
||||
) = (self.cursor.token_tree(), next.token_tree())
|
||||
{
|
||||
if curr.spacing == tt::Spacing::Alone {
|
||||
self.inner.token(WHITESPACE, " ".into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -344,6 +365,7 @@ impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::tests::{expand, create_rules};
|
||||
use ra_parser::TokenSource;
|
||||
|
||||
#[test]
|
||||
fn convert_tt_token_source() {
|
||||
|
@ -363,24 +385,27 @@ mod tests {
|
|||
);
|
||||
let expansion = expand(&rules, "literals!(foo)");
|
||||
let buffer = tt::buffer::TokenBuffer::new(&[expansion.clone().into()]);
|
||||
let tt_src = SubtreeTokenSource::new(&buffer);
|
||||
|
||||
let query = tt_src.querier();
|
||||
let mut tt_src = SubtreeTokenSource::new(&buffer);
|
||||
let mut tokens = vec![];
|
||||
while tt_src.current().kind != EOF {
|
||||
tokens.push((tt_src.current().kind, tt_src.text()));
|
||||
tt_src.bump();
|
||||
}
|
||||
|
||||
// [${]
|
||||
// [let] [a] [=] ['c'] [;]
|
||||
assert_eq!(query.token(2 + 3).1, "'c'");
|
||||
assert_eq!(query.token(2 + 3).0, CHAR);
|
||||
assert_eq!(tokens[2 + 3].1, "'c'");
|
||||
assert_eq!(tokens[2 + 3].0, CHAR);
|
||||
// [let] [c] [=] [1000] [;]
|
||||
assert_eq!(query.token(2 + 5 + 3).1, "1000");
|
||||
assert_eq!(query.token(2 + 5 + 3).0, INT_NUMBER);
|
||||
assert_eq!(tokens[2 + 5 + 3].1, "1000");
|
||||
assert_eq!(tokens[2 + 5 + 3].0, INT_NUMBER);
|
||||
// [let] [f] [=] [12E+99_f64] [;]
|
||||
assert_eq!(query.token(2 + 10 + 3).1, "12E+99_f64");
|
||||
assert_eq!(query.token(2 + 10 + 3).0, FLOAT_NUMBER);
|
||||
assert_eq!(tokens[2 + 10 + 3].1, "12E+99_f64");
|
||||
assert_eq!(tokens[2 + 10 + 3].0, FLOAT_NUMBER);
|
||||
|
||||
// [let] [s] [=] ["rust1"] [;]
|
||||
assert_eq!(query.token(2 + 15 + 3).1, "\"rust1\"");
|
||||
assert_eq!(query.token(2 + 15 + 3).0, STRING);
|
||||
assert_eq!(tokens[2 + 15 + 3].1, "\"rust1\"");
|
||||
assert_eq!(tokens[2 + 15 + 3].0, STRING);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -166,4 +166,19 @@ impl<'a> Cursor<'a> {
|
|||
Cursor::create(self.buffer, EntryPtr(self.ptr.0, self.ptr.1 + 1))
|
||||
}
|
||||
}
|
||||
|
||||
/// Bump the cursor, if it is a subtree, returns
|
||||
/// a cursor into that subtree
|
||||
pub fn bump_subtree(self) -> Cursor<'a> {
|
||||
match self.entry() {
|
||||
Some(Entry::Subtree(_, _)) => self.subtree().unwrap(),
|
||||
_ => self.bump(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether it is a top level
|
||||
pub fn is_root(&self) -> bool {
|
||||
let entry_id = self.ptr.0;
|
||||
return entry_id.0 == 0;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue