From e8447a82104d5f9da2802af1e8530aba7ad6512a Mon Sep 17 00:00:00 2001 From: Marcus Klaas Date: Mon, 19 Oct 2015 21:41:47 +0200 Subject: [PATCH] Fix large block comments --- Cargo.lock | 4 +- src/comment.rs | 119 ++++++++++++++++++++++++++----------- src/expr.rs | 8 +-- src/items.rs | 7 ++- src/missed_spans.rs | 124 ++++++++++++++++++++++++++++++++++----- src/visitor.rs | 17 +++++- tests/source/comment.rs | 39 ++++++++++++ tests/target/closure.rs | 5 +- tests/target/comment.rs | 42 +++++++++++++ tests/target/doc.rs | 2 +- tests/target/expr.rs | 3 +- tests/target/match.rs | 9 ++- tests/target/multiple.rs | 3 +- 13 files changed, 314 insertions(+), 68 deletions(-) create mode 100644 tests/source/comment.rs create mode 100644 tests/target/comment.rs diff --git a/Cargo.lock b/Cargo.lock index 3648e5e5f67..c00a673ebf6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,7 +7,7 @@ dependencies = [ "rustc-serialize 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)", "strings 0.0.1 (git+https://github.com/nrc/strings.rs.git)", "term 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)", - "toml 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)", + "toml 0.1.23 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -93,7 +93,7 @@ dependencies = [ [[package]] name = "toml" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "rustc-serialize 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/src/comment.rs b/src/comment.rs index e0910ba093e..e4ce5a7ed35 100644 --- a/src/comment.rs +++ b/src/comment.rs @@ -81,11 +81,11 @@ pub fn rewrite_comment(orig: &str, let rewrite = try_opt!(rewrite_string(line, &fmt)); result.push_str(&rewrite); } else { - if line.len() == 0 { - result.pop(); // Remove space if this is an empty comment. - } else { - result.push_str(line); + if line.len() == 0 || line.starts_with('!') { + // Remove space if this is an empty comment or a doc comment. + result.pop(); } + result.push_str(line); } first = false; @@ -162,7 +162,7 @@ pub fn contains_comment(text: &str) -> bool { CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment) } -pub struct CharClasses +struct CharClasses where T: Iterator, T::Item: RichChar { @@ -170,7 +170,7 @@ pub struct CharClasses status: CharClassesStatus, } -pub trait RichChar { +trait RichChar { fn get_char(&self) -> char; } @@ -211,7 +211,7 @@ pub enum CodeCharKind { } impl CharClasses where T: Iterator, T::Item: RichChar { - pub fn new(base: T) -> CharClasses { + fn new(base: T) -> CharClasses { CharClasses { base: base.peekable(), status: CharClassesStatus::Normal, @@ -291,10 +291,6 @@ impl Iterator for CharClasses where T: Iterator, T::Item: RichChar { '\n' => CharClassesStatus::Normal, _ => CharClassesStatus::LineComment, }; - // let code_char_kind = match chr { - // '\n' => CodeCharKind::Normal, - // _ => CodeCharKind::Comment, - // }; return Some((CodeCharKind::Comment, item)); } }; @@ -302,9 +298,12 @@ impl Iterator for CharClasses where T: Iterator, T::Item: RichChar { } } +/// Iterator over an alternating sequence of functional and commented parts of +/// a string. The first item is always a, possibly zero length, subslice of +/// functional text. Line style comments contain their ending newlines. pub struct CommentCodeSlices<'a> { slice: &'a str, - last_slice_type: CodeCharKind, + last_slice_kind: CodeCharKind, last_slice_end: usize, } @@ -312,7 +311,7 @@ impl<'a> CommentCodeSlices<'a> { pub fn new(slice: &'a str) -> CommentCodeSlices<'a> { CommentCodeSlices { slice: slice, - last_slice_type: CodeCharKind::Comment, + last_slice_kind: CodeCharKind::Comment, last_slice_end: 0, } } @@ -327,32 +326,52 @@ impl<'a> Iterator for CommentCodeSlices<'a> { } let mut sub_slice_end = self.last_slice_end; - for (kind, (i, _)) in CharClasses::new(self.slice[self.last_slice_end..].char_indices()) { - if kind == self.last_slice_type { - sub_slice_end = self.last_slice_end + i; + let mut first_whitespace = None; + let subslice = &self.slice[self.last_slice_end..]; + let mut iter = CharClasses::new(subslice.char_indices()); + + for (kind, (i, c)) in &mut iter { + let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal && + &subslice[..2] == "//" && + [' ', '\t'].contains(&c); + + if is_comment_connector && first_whitespace.is_none() { + first_whitespace = Some(i); + } + + if kind == self.last_slice_kind && !is_comment_connector { + let last_index = match first_whitespace { + Some(j) => j, + None => i, + }; + sub_slice_end = self.last_slice_end + last_index; break; } + + if !is_comment_connector { + first_whitespace = None; + } } - let kind = match self.last_slice_type { + if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) { + // This was the last subslice. + sub_slice_end = match first_whitespace { + Some(i) => self.last_slice_end + i, + None => self.slice.len(), + }; + } + + let kind = match self.last_slice_kind { CodeCharKind::Comment => CodeCharKind::Normal, CodeCharKind::Normal => CodeCharKind::Comment, }; - self.last_slice_type = kind; + let res = (kind, + self.last_slice_end, + &self.slice[self.last_slice_end..sub_slice_end]); + self.last_slice_end = sub_slice_end; + self.last_slice_kind = kind; - // FIXME: be consistent in use of kind vs type. - if sub_slice_end == self.last_slice_end { - // This was the last subslice. - self.last_slice_end = self.slice.len(); - - Some((kind, sub_slice_end, &self.slice[sub_slice_end..])) - } else { - let res = (kind, - self.last_slice_end, - &self.slice[self.last_slice_end..sub_slice_end]); - self.last_slice_end = sub_slice_end; - Some(res) - } + Some(res) } } @@ -362,10 +381,20 @@ mod test { CommentCodeSlices}; use Indent; + #[test] + fn char_classes() { + let mut iter = CharClasses::new("//\n\n".chars()); + + assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap()); + assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap()); + assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap()); + assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap()); + assert_eq!(None, iter.next()); + } + #[test] fn comment_code_slices() { let input = "code(); /* test */ 1 + 1"; - let mut iter = CommentCodeSlices::new(input); assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap()); @@ -375,6 +404,31 @@ mod test { assert_eq!(None, iter.next()); } + #[test] + fn comment_code_slices_two() { + let input = "// comment\n test();"; + let mut iter = CommentCodeSlices::new(input); + + assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap()); + assert_eq!((CodeCharKind::Comment, 0, "// comment\n"), + iter.next().unwrap()); + assert_eq!((CodeCharKind::Normal, 11, " test();"), + iter.next().unwrap()); + assert_eq!(None, iter.next()); + } + + #[test] + fn comment_code_slices_three() { + let input = "1 // comment\n // comment2\n\n"; + let mut iter = CommentCodeSlices::new(input); + + assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap()); + assert_eq!((CodeCharKind::Comment, 2, "// comment\n // comment2\n"), + iter.next().unwrap()); + assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap()); + assert_eq!(None, iter.next()); + } + #[test] #[rustfmt_skip] fn format_comments() { @@ -434,7 +488,6 @@ mod test { #[test] fn test_find_uncommented() { fn check(haystack: &str, needle: &str, expected: Option) { - println!("haystack {:?}, needle: {:?}", haystack, needle); assert_eq!(expected, haystack.find_uncommented(needle)); } diff --git a/src/expr.rs b/src/expr.rs index 8592ad47b9e..f0a877db215 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -471,10 +471,6 @@ impl Rewrite for ast::Block { visitor.visit_block(self); - // Push text between last block item and end of block - let snippet = visitor.snippet(mk_sp(visitor.last_pos, self.span.hi)); - visitor.buffer.push_str(&snippet); - Some(format!("{}{}", prefix, visitor.buffer)) } } @@ -1534,8 +1530,8 @@ pub fn rewrite_assign_rhs>(context: &RewriteContext, let new_offset = offset.block_indent(context.config); result.push_str(&format!("\n{}", new_offset.to_string(context.config))); - // FIXME: we probably should related max_width to width instead of config.max_width - // where is the 1 coming from anyway? + // FIXME: we probably should related max_width to width instead of + // config.max_width where is the 1 coming from anyway? let max_width = try_opt!(context.config.max_width.checked_sub(new_offset.width() + 1)); let inner_context = context.nested_context(); let rhs = ex.rewrite(&inner_context, max_width, new_offset); diff --git a/src/items.rs b/src/items.rs index 78d35427519..c8054b8d01f 100644 --- a/src/items.rs +++ b/src/items.rs @@ -287,8 +287,8 @@ impl<'a> FmtVisitor<'a> { has_body: bool) -> Option<(String, bool)> { let mut force_new_line_for_brace = false; - // FIXME we'll lose any comments in between parts of the function decl, but anyone - // who comments there probably deserves what they get. + // FIXME we'll lose any comments in between parts of the function decl, but + // anyone who comments there probably deserves what they get. let where_clause = &generics.where_clause; @@ -1008,7 +1008,8 @@ impl<'a> FmtVisitor<'a> { span_start, span_end); let item_vec = items.collect::>(); - // FIXME: we don't need to collect here if the where_layout isnt horizontalVertical + // FIXME: we don't need to collect here if the where_layout isnt + // HorizontalVertical. let tactic = definitive_tactic(&item_vec, self.config.where_layout, budget); let fmt = ListFormatting { diff --git a/src/missed_spans.rs b/src/missed_spans.rs index 53819bfdd8f..04d3ac3366a 100644 --- a/src/missed_spans.rs +++ b/src/missed_spans.rs @@ -10,7 +10,7 @@ use visitor::FmtVisitor; -use syntax::codemap::{self, BytePos, Span}; +use syntax::codemap::{self, BytePos, Span, Pos}; use comment::{CodeCharKind, CommentCodeSlices, rewrite_comment}; impl<'a> FmtVisitor<'a> { @@ -55,22 +55,116 @@ impl<'a> FmtVisitor<'a> { self.last_pos = end; let span = codemap::mk_sp(start, end); - self.write_snippet(&snippet, &process_last_snippet); + self.write_snippet(span, &process_last_snippet); } - fn write_snippet(&mut self, - snippet: &str, - process_last_snippet: F) { - let mut lines: Vec<&str> = snippet.lines().collect(); - let last_snippet = if snippet.ends_with("\n") { - "" - } else { - lines.pop().unwrap() - }; - for line in lines.iter() { - self.buffer.push_str(line.trim_right()); - self.buffer.push_str("\n"); + fn write_snippet(&mut self, span: Span, process_last_snippet: F) + where F: Fn(&mut FmtVisitor, &str, &str) + { + // Get a snippet from the file start to the span's hi without allocating. + // We need it to determine what precedes the current comment. If the comment + // follows code on the same line, we won't touch it. + let big_span_lo = self.codemap.lookup_char_pos(span.lo).file.start_pos; + let local_begin = self.codemap.lookup_byte_offset(big_span_lo); + let local_end = self.codemap.lookup_byte_offset(span.hi); + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + let big_snippet = &local_begin.fm.src.as_ref().unwrap()[start_index..end_index]; + + let big_diff = (span.lo - big_span_lo).to_usize(); + let snippet = self.snippet(span); + + self.write_snippet_inner(big_snippet, big_diff, &snippet, process_last_snippet); + } + + fn write_snippet_inner(&mut self, + big_snippet: &str, + big_diff: usize, + snippet: &str, + process_last_snippet: F) + where F: Fn(&mut FmtVisitor, &str, &str) + { + // Trim whitespace from the right hand side of each line. + // Annoyingly, the library functions for splitting by lines etc. are not + // quite right, so we must do it ourselves. + let mut line_start = 0; + let mut last_wspace = None; + let mut rewrite_next_comment = true; + + for (kind, offset, subslice) in CommentCodeSlices::new(snippet) { + if let CodeCharKind::Comment = kind { + let last_char = big_snippet[..(offset + big_diff)] + .chars() + .rev() + .skip_while(|rev_c| [' ', '\t'].contains(&rev_c)) + .next(); + + let fix_indent = last_char.map(|rev_c| ['{', '\n'].contains(&rev_c)) + .unwrap_or(true); + + if rewrite_next_comment && fix_indent { + if let Some('{') = last_char { + self.buffer.push_str("\n"); + } + + let comment_width = ::std::cmp::min(self.config.ideal_width, + self.config.max_width - + self.block_indent.width()); + + self.buffer.push_str(&self.block_indent.to_string(self.config)); + self.buffer.push_str(&rewrite_comment(subslice, + false, + comment_width, + self.block_indent, + self.config) + .unwrap()); + + last_wspace = None; + line_start = offset + subslice.len(); + + if let Some('/') = subslice.chars().skip(1).next() { + self.buffer.push_str("\n"); + } else if line_start < snippet.len() { + let x = (&snippet[line_start..]).chars().next().unwrap() != '\n'; + + if x { + self.buffer.push_str("\n"); + } + } + + continue; + } else { + rewrite_next_comment = false; + } + } + + for (mut i, c) in subslice.char_indices() { + i += offset; + + if c == '\n' { + if let Some(lw) = last_wspace { + self.buffer.push_str(&snippet[line_start..lw]); + self.buffer.push_str("\n"); + } else { + self.buffer.push_str(&snippet[line_start..i + 1]); + } + + line_start = i + 1; + last_wspace = None; + rewrite_next_comment = rewrite_next_comment || kind == CodeCharKind::Normal; + } else { + if c.is_whitespace() { + if last_wspace.is_none() { + last_wspace = Some(i); + } + } else { + rewrite_next_comment = rewrite_next_comment || kind == CodeCharKind::Normal; + last_wspace = None; + } + } + } } - process_last_snippet(self, &last_snippet, snippet); + + process_last_snippet(self, &snippet[line_start..], &snippet); } } diff --git a/src/visitor.rs b/src/visitor.rs index 17f197ec880..b8de0fc7189 100644 --- a/src/visitor.rs +++ b/src/visitor.rs @@ -101,11 +101,22 @@ impl<'a, 'v> visit::Visitor<'v> for FmtVisitor<'a> { } } - self.block_indent = self.block_indent.block_unindent(self.config); // TODO: we should compress any newlines here to just one self.format_missing_with_indent(b.span.hi - brace_compensation); + // FIXME: this is a terrible hack to indent the comments between the last + // item in the block and the closing brace to the block's level. + // The closing brace itself, however, should be indented at a shallower + // level. + let total_len = self.buffer.len; + let chars_too_many = if self.config.hard_tabs { + 1 + } else { + self.config.tab_spaces + }; + self.buffer.truncate(total_len - chars_too_many); self.buffer.push_str("}"); self.last_pos = b.span.hi; + self.block_indent = self.block_indent.block_unindent(self.config); } // Note that this only gets called for function definitions. Required methods @@ -177,6 +188,7 @@ impl<'a, 'v> visit::Visitor<'v> for FmtVisitor<'a> { // FIXME(#78): format traits and impl definitions. ast::Item_::ItemImpl(..) | ast::Item_::ItemTrait(..) => { + self.format_missing_with_indent(item.span.lo); self.block_indent = self.block_indent.block_indent(self.config); visit::walk_item(self, item); self.block_indent = self.block_indent.block_unindent(self.config); @@ -215,6 +227,9 @@ impl<'a, 'v> visit::Visitor<'v> for FmtVisitor<'a> { } ast::Item_::ItemMac(..) => { self.format_missing_with_indent(item.span.lo); + let snippet = self.snippet(item.span); + self.buffer.push_str(&snippet); + self.last_pos = item.span.hi; // FIXME: we cannot format these yet, because of a bad span. // See rust lang issue #28424. // visit::walk_item(self, item); diff --git a/tests/source/comment.rs b/tests/source/comment.rs new file mode 100644 index 00000000000..34b5de79b75 --- /dev/null +++ b/tests/source/comment.rs @@ -0,0 +1,39 @@ +//! Doc comment +fn test() { +// comment + // comment2 + + code(); /* leave this comment alone! + * ok? */ + + /* Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a + * diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam + * viverra nec consectetur ante hendrerit. Donec et mollis dolor. + * Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam + * tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut + * libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit + * amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis + * felis, pulvinar a semper sed, adipiscing id dolor. */ + + // Very looooooooooooooooooooooooooooooooooooooooooooooooooooooooong comment that should be split + + // println!("{:?}", rewrite_comment(subslice, + // false, + // comment_width, + // self.block_indent, + // self.config) + // .unwrap()); + + funk(); //dontchangeme + // or me +} + + /// test123 +fn doc_comment() { +} + +fn chains() { + foo.bar(|| { + let x = 10; + /* comment */ x }) +} diff --git a/tests/target/closure.rs b/tests/target/closure.rs index 72acae8dbd9..6ab217ed6c4 100644 --- a/tests/target/closure.rs +++ b/tests/target/closure.rs @@ -19,7 +19,7 @@ fn main() { }; let loooooooooooooong_name = |field| { - // TODO(#27): format comments. + // TODO(#27): format comments. if field.node.attrs.len() > 0 { field.node.attrs[0].span.lo } else { @@ -39,7 +39,8 @@ fn main() { let empty = |arg| {}; - let simple = |arg| { /* TODO(#27): comment formatting */ + let simple = |arg| { + // TODO(#27): comment formatting foo(arg) }; diff --git a/tests/target/comment.rs b/tests/target/comment.rs new file mode 100644 index 00000000000..575631cd3be --- /dev/null +++ b/tests/target/comment.rs @@ -0,0 +1,42 @@ +//! Doc comment +fn test() { + // comment + // comment2 + + code(); /* leave this comment alone! + * ok? */ + + // Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a + // diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam + // viverra nec consectetur ante hendrerit. Donec et mollis dolor. + // Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam + // tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut + // libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit + // amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis + // felis, pulvinar a semper sed, adipiscing id dolor. + + // Very looooooooooooooooooooooooooooooooooooooooooooooooooooooooong comment + // that should be split + + // println!("{:?}", rewrite_comment(subslice, + // false, + // comment_width, + // self.block_indent, + // self.config) + // .unwrap()); + + funk(); //dontchangeme + // or me +} + +/// test123 +fn doc_comment() { +} + +fn chains() { + foo.bar(|| { + let x = 10; + // comment + x + }) +} diff --git a/tests/target/doc.rs b/tests/target/doc.rs index f325337c758..9e883c50afa 100644 --- a/tests/target/doc.rs +++ b/tests/target/doc.rs @@ -1,3 +1,3 @@ // sadfsdfa -//sdffsdfasdf +// sdffsdfasdf diff --git a/tests/target/expr.rs b/tests/target/expr.rs index 479a279e43b..4b4afaa99e5 100644 --- a/tests/target/expr.rs +++ b/tests/target/expr.rs @@ -142,7 +142,8 @@ fn baz() { fn qux() { {} // FIXME this one could be done better. - { /* a block with a comment */ + { + // a block with a comment } { diff --git a/tests/target/match.rs b/tests/target/match.rs index 6c498014aa1..7202b0e0c80 100644 --- a/tests/target/match.rs +++ b/tests/target/match.rs @@ -6,7 +6,8 @@ fn foo() { // Some comment. a => foo(), b if 0 < 42 => foo(), - c => { // Another comment. + c => { + // Another comment. // Comment. an_expression; foo() @@ -112,7 +113,8 @@ fn issue339() { // collapsing here exceeds line length ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffg => { } - h => { // comment above block + h => { + // comment above block } i => {} // comment below block j => { @@ -133,7 +135,8 @@ fn issue339() { m => {} n => {} o => {} - p => { // Dont collapse me + p => { + // Dont collapse me } q => {} r => {} diff --git a/tests/target/multiple.rs b/tests/target/multiple.rs index 3c17eb77924..7d6513f70b7 100644 --- a/tests/target/multiple.rs +++ b/tests/target/multiple.rs @@ -22,7 +22,8 @@ mod doc; mod other; -// sfdgfffffffffffffffffffffffffffffffffffffffffffffffffffffff ffffffffffffffffffffffffffffffffffffffffff +// sfdgfffffffffffffffffffffffffffffffffffffffffffffffffffffff +// ffffffffffffffffffffffffffffffffffffffffff fn foo(a: isize, b: u32 /* blah blah */, c: f64) {