Fix #3961 : use char range methods instead of byte offsets to detect whitespace.
This commit is contained in:
parent
541c657a73
commit
1deebeef7d
2 changed files with 132 additions and 14 deletions
|
@ -198,26 +198,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME #3961: This is not the right way to convert string byte
|
||||
// offsets to characters.
|
||||
fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
|
||||
let mut i: uint = begin;
|
||||
while i != end {
|
||||
if !is_whitespace(s[i] as char) { return false; } i += 1u;
|
||||
// Returns None if the first col chars of s contain a non-whitespace char.
|
||||
// Otherwise returns Some(k) where k is first char offset after that leading
|
||||
// whitespace. Note k may be outside bounds of s.
|
||||
fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
|
||||
let len = s.len();
|
||||
let mut col = col.to_uint();
|
||||
let mut cursor: uint = 0;
|
||||
while col > 0 && cursor < len {
|
||||
let r: str::CharRange = str::char_range_at(s, cursor);
|
||||
if !r.ch.is_whitespace() {
|
||||
return None;
|
||||
}
|
||||
cursor = r.next;
|
||||
col -= 1;
|
||||
}
|
||||
return true;
|
||||
return Some(cursor);
|
||||
}
|
||||
|
||||
fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
|
||||
s: ~str, col: CharPos) {
|
||||
let len = s.len();
|
||||
// FIXME #3961: Doing bytewise comparison and slicing with CharPos
|
||||
let col = col.to_uint();
|
||||
let s1 = if all_whitespace(s, 0, uint::min(len, col)) {
|
||||
if col < len {
|
||||
s.slice(col, len).to_owned()
|
||||
} else { ~"" }
|
||||
} else { s };
|
||||
let s1 = match all_whitespace(s, col) {
|
||||
Some(col) => {
|
||||
if col < len {
|
||||
s.slice(col, len).to_owned()
|
||||
} else { ~"" }
|
||||
}
|
||||
None => s,
|
||||
};
|
||||
debug!("pushing line: %s", s1);
|
||||
lines.push(s1);
|
||||
}
|
||||
|
|
109
src/test/pretty/block-comment-wchar.rs
Normal file
109
src/test/pretty/block-comment-wchar.rs
Normal file
|
@ -0,0 +1,109 @@
|
|||
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// This is meant as a test case for Issue 3961.
|
||||
//
|
||||
// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
|
||||
|
||||
fn f() {
|
||||
fn nested() {
|
||||
/*
|
||||
Spaced2
|
||||
*/
|
||||
/*
|
||||
Spaced10
|
||||
*/
|
||||
/*
|
||||
Tabbed8+2
|
||||
*/
|
||||
/*
|
||||
CR8+2
|
||||
*/
|
||||
}
|
||||
/*
|
||||
Spaced2: (prefixed so start of space aligns with comment)
|
||||
*/
|
||||
/*
|
||||
Tabbed2: (more indented b/c *start* of space will align with comment)
|
||||
*/
|
||||
/*
|
||||
Spaced6: (Alignment removed and realigning spaces inserted)
|
||||
*/
|
||||
/*
|
||||
Tabbed4+2: (Alignment removed and realigning spaces inserted)
|
||||
*/
|
||||
|
||||
/*
|
||||
VT4+2: (should align)
|
||||
*/
|
||||
/*
|
||||
FF4+2: (should align)
|
||||
*/
|
||||
/*
|
||||
CR4+2: (should align)
|
||||
*/
|
||||
/*
|
||||
// (NEL deliberately omitted)
|
||||
*/
|
||||
/*
|
||||
Ogham Space Mark 4+2: (should align)
|
||||
*/
|
||||
/*
|
||||
Mongolian Vowel Separator 4+2: (should align)
|
||||
*/
|
||||
/*
|
||||
Four-per-em space 4+2: (should align)
|
||||
*/
|
||||
|
||||
/*
|
||||
Mongolian Vowel Sep count 1: (should align)
|
||||
Mongolian Vowel Sep count 2: (should align)
|
||||
Mongolian Vowel Sep count 3: (should align)
|
||||
Mongolian Vowel Sep count 4: (should align)
|
||||
Mongolian Vowel Sep count 5: (should align)
|
||||
Mongolian Vowel Sep count 6: (should align)
|
||||
Mongolian Vowel Sep count 7: (should align)
|
||||
Mongolian Vowel Sep count 8: (should align)
|
||||
Mongolian Vowel Sep count 9: (should align)
|
||||
Mongolian Vowel Sep count A: (should align)
|
||||
Mongolian Vowel Sep count B: (should align)
|
||||
Mongolian Vowel Sep count C: (should align)
|
||||
Mongolian Vowel Sep count D: (should align)
|
||||
Mongolian Vowel Sep count E: (should align)
|
||||
Mongolian Vowel Sep count F: (should align)
|
||||
*/
|
||||
|
||||
/* */ /*
|
||||
Hello from offset 6
|
||||
Space 6+2: compare A
|
||||
Mongolian Vowel Separator 6+2: compare B
|
||||
*/
|
||||
/**/ /*
|
||||
Hello from another offset 6 with wchars establishing column offset
|
||||
Space 6+2: compare C
|
||||
Mongolian Vowel Separator 6+2: compare D
|
||||
*/
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Taken from http://en.wikipedia.org/wiki/Whitespace_character
|
||||
let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20',
|
||||
// '\x85', // for some reason Rust thinks NEL isn't whitespace
|
||||
'\xA0', '\u1680', '\u180E',
|
||||
'\u2000', '\u2001', '\u2002', '\u2003',
|
||||
'\u2004', '\u2005', '\u2006', '\u2007',
|
||||
'\u2008', '\u2009', '\u200A',
|
||||
'\u2028', '\u2029', '\u202F', '\u205F',
|
||||
'\u3000'
|
||||
];
|
||||
for vec::each(chars) |c| {
|
||||
io::println(fmt!("%? %?", c, c.is_whitespace()));
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue