From 1deebeef7d18612565029da7d5adb5d71f280d85 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Mon, 27 May 2013 12:08:37 +0200 Subject: [PATCH] Fix #3961 : use char range methods instead of byte offsets to detect whitespace. --- src/libsyntax/parse/comments.rs | 37 +++++---- src/test/pretty/block-comment-wchar.rs | 109 +++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 14 deletions(-) create mode 100644 src/test/pretty/block-comment-wchar.rs diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 5c56ea6c446..29fac8f951d 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -198,26 +198,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool, } } -// FIXME #3961: This is not the right way to convert string byte -// offsets to characters. -fn all_whitespace(s: &str, begin: uint, end: uint) -> bool { - let mut i: uint = begin; - while i != end { - if !is_whitespace(s[i] as char) { return false; } i += 1u; +// Returns None if the first col chars of s contain a non-whitespace char. +// Otherwise returns Some(k) where k is first char offset after that leading +// whitespace. Note k may be outside bounds of s. +fn all_whitespace(s: &str, col: CharPos) -> Option { + let len = s.len(); + let mut col = col.to_uint(); + let mut cursor: uint = 0; + while col > 0 && cursor < len { + let r: str::CharRange = str::char_range_at(s, cursor); + if !r.ch.is_whitespace() { + return None; + } + cursor = r.next; + col -= 1; } - return true; + return Some(cursor); } fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], s: ~str, col: CharPos) { let len = s.len(); - // FIXME #3961: Doing bytewise comparison and slicing with CharPos - let col = col.to_uint(); - let s1 = if all_whitespace(s, 0, uint::min(len, col)) { - if col < len { - s.slice(col, len).to_owned() - } else { ~"" } - } else { s }; + let s1 = match all_whitespace(s, col) { + Some(col) => { + if col < len { + s.slice(col, len).to_owned() + } else { ~"" } + } + None => s, + }; debug!("pushing line: %s", s1); lines.push(s1); } diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs new file mode 100644 index 00000000000..cd97e4174f3 --- /dev/null +++ b/src/test/pretty/block-comment-wchar.rs @@ -0,0 +1,109 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is meant as a test case for Issue 3961. +// +// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs + +fn f() { + fn nested() { + /* + Spaced2 + */ + /* + Spaced10 + */ + /* + Tabbed8+2 + */ + /* + CR8+2 + */ + } + /* + Spaced2: (prefixed so start of space aligns with comment) + */ + /* + Tabbed2: (more indented b/c *start* of space will align with comment) + */ + /* + Spaced6: (Alignment removed and realigning spaces inserted) + */ + /* + Tabbed4+2: (Alignment removed and realigning spaces inserted) + */ + + /* + VT4+2: (should align) + */ + /* + FF4+2: (should align) + */ + /* + CR4+2: (should align) + */ + /* + // (NEL deliberately omitted) + */ + /* +     Ogham Space Mark 4+2: (should align) + */ + /* +᠎᠎᠎᠎ Mongolian Vowel Separator 4+2: (should align) + */ + /* +     Four-per-em space 4+2: (should align) + */ + + /* + ᠎ Mongolian Vowel Sep count 1: (should align) + ᠎ Mongolian Vowel Sep count 2: (should align) + ᠎᠎ Mongolian Vowel Sep count 3: (should align) + ᠎ Mongolian Vowel Sep count 4: (should align) + ᠎ ᠎ Mongolian Vowel Sep count 5: (should align) + ᠎᠎ Mongolian Vowel Sep count 6: (should align) + ᠎᠎᠎ Mongolian Vowel Sep count 7: (should align) +᠎ Mongolian Vowel Sep count 8: (should align) +᠎ ᠎ Mongolian Vowel Sep count 9: (should align) +᠎ ᠎ Mongolian Vowel Sep count A: (should align) +᠎ ᠎᠎ Mongolian Vowel Sep count B: (should align) +᠎᠎ Mongolian Vowel Sep count C: (should align) +᠎᠎ ᠎ Mongolian Vowel Sep count D: (should align) +᠎᠎᠎ Mongolian Vowel Sep count E: (should align) +᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align) + */ + +/* */ /* + Hello from offset 6 + Space 6+2: compare A +᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare B + */ +/*᠎*/ /* + Hello from another offset 6 with wchars establishing column offset + Space 6+2: compare C +᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare D + */ +} + +fn main() { + // Taken from http://en.wikipedia.org/wiki/Whitespace_character + let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', + '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', + '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', + '\u3000' + ]; + for vec::each(chars) |c| { + io::println(fmt!("%? %?", c, c.is_whitespace())); + } +}