From 3f5fc05d6675394f46b7cdeb6803d7c7045e16ab Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 18:23:34 +0300 Subject: [PATCH 1/7] internal: add tests for extra parser entry points --- crates/parser/src/output.rs | 1 + crates/parser/src/shortcuts.rs | 4 ++- crates/parser/src/tests.rs | 1 + crates/parser/src/tests/entries.rs | 41 ++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 crates/parser/src/tests/entries.rs diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index b613df029f8..e9ec9822d68 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -22,6 +22,7 @@ pub struct Output { error: Vec<String>, } +#[derive(Debug)] pub enum Step<'a> { Token { kind: SyntaxKind, n_input_tokens: u8 }, Enter { kind: SyntaxKind }, diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 3d28f814c9f..e14526aa733 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -16,6 +16,7 @@ use crate::{ SyntaxKind::{self, *}, }; +#[derive(Debug)] pub enum StrStep<'a> { Token { kind: SyntaxKind, text: &'a str }, Enter { kind: SyntaxKind }, @@ -75,7 +76,8 @@ impl<'a> LexedStr<'a> { builder.eat_trivias(); (builder.sink)(StrStep::Exit); } - State::PendingEnter | State::Normal => unreachable!(), + State::PendingEnter => (), + State::Normal => unreachable!(), } let is_eof = builder.pos == builder.lexed.len(); diff --git a/crates/parser/src/tests.rs b/crates/parser/src/tests.rs index 512f7ddb95b..fb4885e98d5 100644 --- a/crates/parser/src/tests.rs +++ b/crates/parser/src/tests.rs @@ -1,4 +1,5 @@ mod sourcegen_inline_tests; +mod entries; use std::{ fmt::Write, diff --git a/crates/parser/src/tests/entries.rs b/crates/parser/src/tests/entries.rs new file mode 100644 index 00000000000..93e8136263e --- /dev/null +++ b/crates/parser/src/tests/entries.rs @@ -0,0 +1,41 @@ +use crate::{LexedStr, PrefixEntryPoint, StrStep}; + +#[test] +fn vis() { + check_prefix(PrefixEntryPoint::Vis, "pub(crate) fn foo() {}", "pub(crate)"); + check_prefix(PrefixEntryPoint::Vis, "fn foo() {}", ""); + check_prefix(PrefixEntryPoint::Vis, "pub(fn foo() {}", "pub"); + check_prefix(PrefixEntryPoint::Vis, "pub(crate fn foo() {}", "pub(crate"); + check_prefix(PrefixEntryPoint::Vis, "crate fn foo() {}", "crate"); +} + +#[test] +fn block() { + check_prefix(PrefixEntryPoint::Block, "{}, 92", "{}"); + check_prefix(PrefixEntryPoint::Block, "{, 92)", "{, 92)"); + check_prefix(PrefixEntryPoint::Block, "()", ""); +} + +#[test] +fn stmt() { + check_prefix(PrefixEntryPoint::Stmt, "92; fn", "92"); + check_prefix(PrefixEntryPoint::Stmt, "let _ = 92; 1", "let _ = 92"); + check_prefix(PrefixEntryPoint::Stmt, "pub fn f() {} = 92", "pub fn f() {}"); + check_prefix(PrefixEntryPoint::Stmt, ";;;", ";"); + check_prefix(PrefixEntryPoint::Stmt, "+", "+"); + check_prefix(PrefixEntryPoint::Stmt, "@", "@"); + check_prefix(PrefixEntryPoint::Stmt, "loop {} - 1", "loop {}"); +} + +fn check_prefix(entry: PrefixEntryPoint, input: &str, prefix: &str) { + let lexed = LexedStr::new(input); + let input = lexed.to_input(); + let output = entry.parse(&input); + + let mut buf = String::new(); + lexed.intersperse_trivia(&output, &mut |step| match step { + StrStep::Token { kind: _, text } => buf.push_str(text), + _ => (), + }); + assert_eq!(buf.trim(), prefix) +} From ad4b02ecc00977538cf7225a8b47cfc76d386221 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 18:37:06 +0300 Subject: [PATCH 2/7] add automated semicolon insertion test --- .../hir_def/src/macro_expansion_tests/mbe.rs | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/crates/hir_def/src/macro_expansion_tests/mbe.rs b/crates/hir_def/src/macro_expansion_tests/mbe.rs index 466c85fc5b0..4bef508840d 100644 --- a/crates/hir_def/src/macro_expansion_tests/mbe.rs +++ b/crates/hir_def/src/macro_expansion_tests/mbe.rs @@ -299,6 +299,34 @@ fn baz() { ) } +#[test] +fn asi() { + // Thanks, Christopher! + // + // https://internals.rust-lang.org/t/understanding-decisions-behind-semicolons/15181/29 + check( + r#" +macro_rules! asi { ($($stmt:stmt)*) => ($($stmt)*); } + +fn main() { + asi! { + let a = 2 + let b = 5 + drop(b-a) + println!("{}", a+b) + } +} +"#, + expect![[r#" +macro_rules! asi { ($($stmt:stmt)*) => ($($stmt)*); } + +fn main() { + let a = 2let b = 5drop(b-a)println!("{}", a+b) +} +"#]], + ) +} + #[test] fn test_match_group_empty_fixed_token() { check( From 8234a85d158ec83581d970a4a669f97b07c56c2f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 18:51:05 +0300 Subject: [PATCH 3/7] compress --- crates/parser/src/grammar.rs | 4 ++-- crates/parser/src/grammar/expressions.rs | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index 42426a1df28..234e584eeb7 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -59,7 +59,7 @@ pub(crate) mod entry { } pub(crate) fn stmt(p: &mut Parser) { - expressions::stmt(p, expressions::StmtWithSemi::No, true); + expressions::stmt(p, expressions::StmtWithSemi::No); } pub(crate) fn pat(p: &mut Parser) { @@ -103,7 +103,7 @@ pub(crate) mod entry { continue; } - expressions::stmt(p, expressions::StmtWithSemi::Optional, true); + expressions::stmt(p, expressions::StmtWithSemi::Optional); } m.complete(p, MACRO_STMTS); diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs index 64057a4a674..3238b6e9f44 100644 --- a/crates/parser/src/grammar/expressions.rs +++ b/crates/parser/src/grammar/expressions.rs @@ -5,6 +5,7 @@ use super::*; pub(crate) use self::atom::{block_expr, match_arm_list}; pub(super) use self::atom::{literal, LITERAL_FIRST}; +#[derive(PartialEq, Eq)] pub(super) enum StmtWithSemi { Yes, No, @@ -28,7 +29,7 @@ fn expr_no_struct(p: &mut Parser) { expr_bp(p, None, r, 1); } -pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi, prefer_expr: bool) { +pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi) { let m = p.start(); // test attr_on_expr_stmt // fn foo() { @@ -52,7 +53,7 @@ pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi, prefer_expr: bool) { }; if let Some((cm, blocklike)) = expr_stmt(p, Some(m)) { - if !(p.at(T!['}']) || (prefer_expr && p.at(EOF))) { + if !(p.at(T!['}']) || (with_semi != StmtWithSemi::Yes && p.at(EOF))) { // test no_semi_after_block // fn foo() { // if true {} @@ -149,7 +150,7 @@ pub(super) fn expr_block_contents(p: &mut Parser) { continue; } - stmt(p, StmtWithSemi::Yes, false); + stmt(p, StmtWithSemi::Yes); } } From f5cfc0504e30b2e87dc9405fc3aeaee8e2fa6b08 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 19:18:34 +0300 Subject: [PATCH 4/7] rename --- crates/parser/src/grammar.rs | 4 +-- crates/parser/src/grammar/expressions.rs | 35 ++++++++++++------------ 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index 234e584eeb7..b704242065c 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -59,7 +59,7 @@ pub(crate) mod entry { } pub(crate) fn stmt(p: &mut Parser) { - expressions::stmt(p, expressions::StmtWithSemi::No); + expressions::stmt(p, expressions::Semicolon::Forbidden); } pub(crate) fn pat(p: &mut Parser) { @@ -103,7 +103,7 @@ pub(crate) mod entry { continue; } - expressions::stmt(p, expressions::StmtWithSemi::Optional); + expressions::stmt(p, expressions::Semicolon::Optional); } m.complete(p, MACRO_STMTS); diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs index 3238b6e9f44..c585fdb0967 100644 --- a/crates/parser/src/grammar/expressions.rs +++ b/crates/parser/src/grammar/expressions.rs @@ -6,10 +6,10 @@ pub(crate) use self::atom::{block_expr, match_arm_list}; pub(super) use self::atom::{literal, LITERAL_FIRST}; #[derive(PartialEq, Eq)] -pub(super) enum StmtWithSemi { - Yes, - No, +pub(super) enum Semicolon { + Required, Optional, + Forbidden, } const EXPR_FIRST: TokenSet = LHS_FIRST; @@ -29,7 +29,7 @@ fn expr_no_struct(p: &mut Parser) { expr_bp(p, None, r, 1); } -pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi) { +pub(super) fn stmt(p: &mut Parser, semicolon: Semicolon) { let m = p.start(); // test attr_on_expr_stmt // fn foo() { @@ -41,7 +41,7 @@ pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi) { attributes::outer_attrs(p); if p.at(T![let]) { - let_stmt(p, m, with_semi); + let_stmt(p, m, semicolon); return; } @@ -53,7 +53,7 @@ pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi) { }; if let Some((cm, blocklike)) = expr_stmt(p, Some(m)) { - if !(p.at(T!['}']) || (with_semi != StmtWithSemi::Yes && p.at(EOF))) { + if !(p.at(T!['}']) || (semicolon != Semicolon::Required && p.at(EOF))) { // test no_semi_after_block // fn foo() { // if true {} @@ -69,27 +69,26 @@ pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi) { // test!{} // } let m = cm.precede(p); - match with_semi { - StmtWithSemi::No => (), - StmtWithSemi::Optional => { - p.eat(T![;]); - } - StmtWithSemi::Yes => { + match semicolon { + Semicolon::Required => { if blocklike.is_block() { p.eat(T![;]); } else { p.expect(T![;]); } } + Semicolon::Optional => { + p.eat(T![;]); + } + Semicolon::Forbidden => (), } - m.complete(p, EXPR_STMT); } } // test let_stmt // fn f() { let x: i32 = 92; } - fn let_stmt(p: &mut Parser, m: Marker, with_semi: StmtWithSemi) { + fn let_stmt(p: &mut Parser, m: Marker, with_semi: Semicolon) { p.bump(T![let]); patterns::pattern(p); if p.at(T![:]) { @@ -114,11 +113,11 @@ pub(super) fn stmt(p: &mut Parser, with_semi: StmtWithSemi) { } match with_semi { - StmtWithSemi::No => (), - StmtWithSemi::Optional => { + Semicolon::Forbidden => (), + Semicolon::Optional => { p.eat(T![;]); } - StmtWithSemi::Yes => { + Semicolon::Required => { p.expect(T![;]); } } @@ -150,7 +149,7 @@ pub(super) fn expr_block_contents(p: &mut Parser) { continue; } - stmt(p, StmtWithSemi::Yes); + stmt(p, Semicolon::Required); } } From 841cd30b451ee80c54c47e9b0c6ea42ae2de4795 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 19:37:35 +0300 Subject: [PATCH 5/7] Add test --- .../hir_def/src/macro_expansion_tests/mbe.rs | 28 ----------- .../src/macro_expansion_tests/mbe/matching.rs | 49 +++++++++++++++++++ 2 files changed, 49 insertions(+), 28 deletions(-) diff --git a/crates/hir_def/src/macro_expansion_tests/mbe.rs b/crates/hir_def/src/macro_expansion_tests/mbe.rs index 4bef508840d..466c85fc5b0 100644 --- a/crates/hir_def/src/macro_expansion_tests/mbe.rs +++ b/crates/hir_def/src/macro_expansion_tests/mbe.rs @@ -299,34 +299,6 @@ fn baz() { ) } -#[test] -fn asi() { - // Thanks, Christopher! - // - // https://internals.rust-lang.org/t/understanding-decisions-behind-semicolons/15181/29 - check( - r#" -macro_rules! asi { ($($stmt:stmt)*) => ($($stmt)*); } - -fn main() { - asi! { - let a = 2 - let b = 5 - drop(b-a) - println!("{}", a+b) - } -} -"#, - expect![[r#" -macro_rules! asi { ($($stmt:stmt)*) => ($($stmt)*); } - -fn main() { - let a = 2let b = 5drop(b-a)println!("{}", a+b) -} -"#]], - ) -} - #[test] fn test_match_group_empty_fixed_token() { check( diff --git a/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs b/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs index 9fb6d96b725..83e8937f5b1 100644 --- a/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs +++ b/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs @@ -50,3 +50,52 @@ macro_rules! m{ ($fmt:expr) => (); } "#]], ); } + +#[test] +fn asi() { + // Thanks, Christopher! + // + // https://internals.rust-lang.org/t/understanding-decisions-behind-semicolons/15181/29 + check( + r#" +macro_rules! asi { ($($stmt:stmt)*) => ($($stmt)*); } + +fn main() { + asi! { + let a = 2 + let b = 5 + drop(b-a) + println!("{}", a+b) + } +} +"#, + expect![[r#" +macro_rules! asi { ($($stmt:stmt)*) => ($($stmt)*); } + +fn main() { + let a = 2let b = 5drop(b-a)println!("{}", a+b) +} +"#]], + ) +} + +#[test] +fn stmt_boundaries() { + // FIXME: this actually works OK under rustc. + check( + r#" +macro_rules! m { + ($($s:stmt)*) => (stringify!($($s |)*)) +} +// +errors +m!(;;92;let x = 92; loop {};); +"#, + expect![[r#" +macro_rules! m { + ($($s:stmt)*) => (stringify!($($s |)*)) +} +/* error: expected Stmt *//* parse error: expected SEMICOLON */ +stringify!() +"#]], + ); +} From b5369927d79d1f95b9748ded2523a65b72f05a1d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 20:04:36 +0300 Subject: [PATCH 6/7] parse empty statemet as statemetn --- .../src/macro_expansion_tests/mbe/matching.rs | 14 +++++++++----- crates/parser/src/grammar.rs | 5 ----- crates/parser/src/grammar/expressions.rs | 10 ++++------ crates/parser/src/shortcuts.rs | 6 ++---- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs b/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs index 83e8937f5b1..b93072d4466 100644 --- a/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs +++ b/crates/hir_def/src/macro_expansion_tests/mbe/matching.rs @@ -85,17 +85,21 @@ fn stmt_boundaries() { check( r#" macro_rules! m { - ($($s:stmt)*) => (stringify!($($s |)*)) + ($($s:stmt)*) => (stringify!($($s |)*);) } -// +errors m!(;;92;let x = 92; loop {};); "#, expect![[r#" macro_rules! m { - ($($s:stmt)*) => (stringify!($($s |)*)) + ($($s:stmt)*) => (stringify!($($s |)*);) } -/* error: expected Stmt *//* parse error: expected SEMICOLON */ -stringify!() +stringify!(; +|; +|92|; +|let x = 92|; +|loop {} +|; +|); "#]], ); } diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index b704242065c..e1a265d817c 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -98,11 +98,6 @@ pub(crate) mod entry { let m = p.start(); while !p.at(EOF) { - if p.at(T![;]) { - p.bump(T![;]); - continue; - } - expressions::stmt(p, expressions::Semicolon::Optional); } diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs index c585fdb0967..9dbba89c568 100644 --- a/crates/parser/src/grammar/expressions.rs +++ b/crates/parser/src/grammar/expressions.rs @@ -30,6 +30,10 @@ fn expr_no_struct(p: &mut Parser) { } pub(super) fn stmt(p: &mut Parser, semicolon: Semicolon) { + if p.eat(T![;]) { + return; + } + let m = p.start(); // test attr_on_expr_stmt // fn foo() { @@ -143,12 +147,6 @@ pub(super) fn expr_block_contents(p: &mut Parser) { // fn f() {}; // struct S {}; // } - - if p.at(T![;]) { - p.bump(T![;]); - continue; - } - stmt(p, Semicolon::Required); } } diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index e14526aa733..7040608a9f7 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -76,8 +76,7 @@ impl<'a> LexedStr<'a> { builder.eat_trivias(); (builder.sink)(StrStep::Exit); } - State::PendingEnter => (), - State::Normal => unreachable!(), + State::PendingEnter | State::Normal => (), } let is_eof = builder.pos == builder.lexed.len(); @@ -101,9 +100,8 @@ enum State { impl Builder<'_, '_> { fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { match mem::replace(&mut self.state, State::Normal) { - State::PendingEnter => unreachable!(), State::PendingExit => (self.sink)(StrStep::Exit), - State::Normal => (), + State::PendingEnter | State::Normal => (), } self.eat_trivias(); self.do_token(kind, n_tokens as usize); From 2f3237912dd557493560f76a9fbbd7926b7933a5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov <aleksey.kladov@gmail.com> Date: Wed, 29 Dec 2021 20:37:08 +0300 Subject: [PATCH 7/7] restore invariatns --- crates/parser/src/shortcuts.rs | 6 ++++-- crates/parser/src/tests/entries.rs | 29 +++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 7040608a9f7..b038d44fe08 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -50,6 +50,7 @@ impl<'a> LexedStr<'a> { res } + /// NB: only valid to call with Output from Reparser/TopLevelEntry. pub fn intersperse_trivia( &self, output: &crate::Output, @@ -76,7 +77,7 @@ impl<'a> LexedStr<'a> { builder.eat_trivias(); (builder.sink)(StrStep::Exit); } - State::PendingEnter | State::Normal => (), + State::PendingEnter | State::Normal => unreachable!(), } let is_eof = builder.pos == builder.lexed.len(); @@ -100,8 +101,9 @@ enum State { impl Builder<'_, '_> { fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { match mem::replace(&mut self.state, State::Normal) { + State::PendingEnter => unreachable!(), State::PendingExit => (self.sink)(StrStep::Exit), - State::PendingEnter | State::Normal => (), + State::Normal => (), } self.eat_trivias(); self.do_token(kind, n_tokens as usize); diff --git a/crates/parser/src/tests/entries.rs b/crates/parser/src/tests/entries.rs index 93e8136263e..947922d8b32 100644 --- a/crates/parser/src/tests/entries.rs +++ b/crates/parser/src/tests/entries.rs @@ -1,4 +1,4 @@ -use crate::{LexedStr, PrefixEntryPoint, StrStep}; +use crate::{LexedStr, PrefixEntryPoint, Step}; #[test] fn vis() { @@ -30,12 +30,25 @@ fn stmt() { fn check_prefix(entry: PrefixEntryPoint, input: &str, prefix: &str) { let lexed = LexedStr::new(input); let input = lexed.to_input(); - let output = entry.parse(&input); - let mut buf = String::new(); - lexed.intersperse_trivia(&output, &mut |step| match step { - StrStep::Token { kind: _, text } => buf.push_str(text), - _ => (), - }); - assert_eq!(buf.trim(), prefix) + let mut n_tokens = 0; + for step in entry.parse(&input).iter() { + match step { + Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize, + Step::Enter { .. } | Step::Exit | Step::Error { .. } => (), + } + } + + let mut i = 0; + loop { + if n_tokens == 0 { + break; + } + if !lexed.kind(i).is_trivia() { + n_tokens -= 1; + } + i += 1; + } + let buf = &lexed.as_str()[..lexed.text_start(i)]; + assert_eq!(buf, prefix); }