Added str::char_offset_iter() and str::rev_char_offset_iter()

Renamed bytes_iter to byte_iter to match other iterators
Refactored str Iterators to use DoubleEnded Iterators and typedefs instead of wrapper structs
Reordered the Iterator section
Whitespace fixup
Moved clunky `each_split_within` function to the one place in the tree where it's actually needed
Replaced all block doccomments in str with line doccomments
This commit is contained in:
Marvin Löbel 2013-07-27 23:38:38 +02:00
parent d75ab4a5d7
commit e33fca9ffe
7 changed files with 592 additions and 597 deletions

View file

@ -476,7 +476,6 @@ pub mod groups {
use getopts::{HasArg, Long, Maybe, Multi, No, Occur, Opt, Optional, Req};
use getopts::{Short, Yes};
use std::str;
use std::vec;
/** one group of options, e.g., both -h and --help, along with
@ -667,7 +666,7 @@ pub mod groups {
// FIXME: #5516
let mut desc_rows = ~[];
for str::each_split_within(desc_normalized_whitespace, 54) |substr| {
for each_split_within(desc_normalized_whitespace, 54) |substr| {
desc_rows.push(substr.to_owned());
}
@ -683,6 +682,103 @@ pub mod groups {
rows.collect::<~[~str]>().connect("\n") +
"\n\n";
}
/** Splits a string into substrings with possibly internal whitespace,
* each of them at most `lim` bytes long. The substrings have leading and trailing
* whitespace removed, and are only cut at whitespace boundaries.
*
* Note: Function was moved here from `std::str` because this module is the only place that
* uses it, and because it was to specific for a general string function.
*
* #Failure:
*
* Fails during iteration if the string contains a non-whitespace
* sequence longer than the limit.
*/
priv fn each_split_within<'a>(ss: &'a str,
lim: uint,
it: &fn(&'a str) -> bool) -> bool {
// Just for fun, let's write this as an state machine:
enum SplitWithinState {
A, // leading whitespace, initial state
B, // words
C, // internal and trailing whitespace
}
enum Whitespace {
Ws, // current char is whitespace
Cr // current char is not whitespace
}
enum LengthLimit {
UnderLim, // current char makes current substring still fit in limit
OverLim // current char makes current substring no longer fit in limit
}
let mut slice_start = 0;
let mut last_start = 0;
let mut last_end = 0;
let mut state = A;
let mut fake_i = ss.len();
let mut lim = lim;
let mut cont = true;
let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
// if the limit is larger than the string, lower it to save cycles
if (lim >= fake_i) {
lim = fake_i;
}
let machine: &fn((uint, char)) -> bool = |(i, c)| {
let whitespace = if ::std::char::is_whitespace(c) { Ws } else { Cr };
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
state = match (state, whitespace, limit) {
(A, Ws, _) => { A }
(A, Cr, _) => { slice_start = i; last_start = i; B }
(B, Cr, UnderLim) => { B }
(B, Cr, OverLim) if (i - last_start + 1) > lim
=> fail!("word starting with %? longer than limit!",
ss.slice(last_start, i + 1)),
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
(B, Ws, UnderLim) => { last_end = i; C }
(B, Ws, OverLim) => { last_end = i; slice(); A }
(C, Cr, UnderLim) => { last_start = i; B }
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
(C, Ws, OverLim) => { slice(); A }
(C, Ws, UnderLim) => { C }
};
cont
};
ss.iter().enumerate().advance(|x| machine(x));
// Let the automaton 'run out' by supplying trailing whitespace
while cont && match state { B | C => true, A => false } {
machine((fake_i, ' '));
fake_i += 1;
}
return cont;
}
#[test]
priv fn test_split_within() {
fn t(s: &str, i: uint, u: &[~str]) {
let mut v = ~[];
for each_split_within(s, i) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("", 0, []);
t("", 15, []);
t("hello", 15, [~"hello"]);
t("\nMary had a little lamb\nLittle lamb\n", 15,
[~"Mary had a", ~"little lamb", ~"Little lamb"]);
t("\nMary had a little lamb\nLittle lamb\n", ::std::uint::max_value,
[~"Mary had a little lamb\nLittle lamb"]);
}
} // end groups module
#[cfg(test)]

View file

@ -260,7 +260,7 @@ impl Tm {
priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
fn match_str(s: &str, pos: uint, needle: &str) -> bool {
let mut i = pos;
for needle.bytes_iter().advance |ch| {
for needle.byte_iter().advance |ch| {
if s[i] != ch {
return false;
}

File diff suppressed because it is too large Load diff

View file

@ -115,7 +115,7 @@ impl<'self> AsciiCast<&'self[Ascii]> for &'self str {
#[inline]
fn is_ascii(&self) -> bool {
self.bytes_iter().all(|b| b.is_ascii())
self.byte_iter().all(|b| b.is_ascii())
}
}

View file

@ -27,7 +27,7 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: span, tts: &[ast::token_tree]) -> bas
ast::expr_lit(lit) => match lit.node {
// string literal, push each byte to vector expression
ast::lit_str(s) => {
for s.bytes_iter().advance |byte| {
for s.byte_iter().advance |byte| {
bytes.push(cx.expr_u8(sp, byte));
}
}

View file

@ -16,7 +16,7 @@ pub fn main() {
assert_eq!(y, 6);
let s = ~"hello there";
let mut i: int = 0;
for s.bytes_iter().advance |c| {
for s.byte_iter().advance |c| {
if i == 0 { assert!((c == 'h' as u8)); }
if i == 1 { assert!((c == 'e' as u8)); }
if i == 2 { assert!((c == 'l' as u8)); }

View file

@ -41,7 +41,7 @@ pub fn main() {
fn check_str_eq(a: ~str, b: ~str) {
let mut i: int = 0;
for a.bytes_iter().advance |ab| {
for a.byte_iter().advance |ab| {
info!(i);
info!(ab);
let bb: u8 = b[i];