Added str::char_offset_iter() and str::rev_char_offset_iter()
Renamed bytes_iter to byte_iter to match other iterators Refactored str Iterators to use DoubleEnded Iterators and typedefs instead of wrapper structs Reordered the Iterator section Whitespace fixup Moved clunky `each_split_within` function to the one place in the tree where it's actually needed Replaced all block doccomments in str with line doccomments
This commit is contained in:
parent
d75ab4a5d7
commit
e33fca9ffe
7 changed files with 592 additions and 597 deletions
|
@ -476,7 +476,6 @@ pub mod groups {
|
|||
use getopts::{HasArg, Long, Maybe, Multi, No, Occur, Opt, Optional, Req};
|
||||
use getopts::{Short, Yes};
|
||||
|
||||
use std::str;
|
||||
use std::vec;
|
||||
|
||||
/** one group of options, e.g., both -h and --help, along with
|
||||
|
@ -667,7 +666,7 @@ pub mod groups {
|
|||
|
||||
// FIXME: #5516
|
||||
let mut desc_rows = ~[];
|
||||
for str::each_split_within(desc_normalized_whitespace, 54) |substr| {
|
||||
for each_split_within(desc_normalized_whitespace, 54) |substr| {
|
||||
desc_rows.push(substr.to_owned());
|
||||
}
|
||||
|
||||
|
@ -683,6 +682,103 @@ pub mod groups {
|
|||
rows.collect::<~[~str]>().connect("\n") +
|
||||
"\n\n";
|
||||
}
|
||||
|
||||
/** Splits a string into substrings with possibly internal whitespace,
|
||||
* each of them at most `lim` bytes long. The substrings have leading and trailing
|
||||
* whitespace removed, and are only cut at whitespace boundaries.
|
||||
*
|
||||
* Note: Function was moved here from `std::str` because this module is the only place that
|
||||
* uses it, and because it was to specific for a general string function.
|
||||
*
|
||||
* #Failure:
|
||||
*
|
||||
* Fails during iteration if the string contains a non-whitespace
|
||||
* sequence longer than the limit.
|
||||
*/
|
||||
priv fn each_split_within<'a>(ss: &'a str,
|
||||
lim: uint,
|
||||
it: &fn(&'a str) -> bool) -> bool {
|
||||
// Just for fun, let's write this as an state machine:
|
||||
|
||||
enum SplitWithinState {
|
||||
A, // leading whitespace, initial state
|
||||
B, // words
|
||||
C, // internal and trailing whitespace
|
||||
}
|
||||
enum Whitespace {
|
||||
Ws, // current char is whitespace
|
||||
Cr // current char is not whitespace
|
||||
}
|
||||
enum LengthLimit {
|
||||
UnderLim, // current char makes current substring still fit in limit
|
||||
OverLim // current char makes current substring no longer fit in limit
|
||||
}
|
||||
|
||||
let mut slice_start = 0;
|
||||
let mut last_start = 0;
|
||||
let mut last_end = 0;
|
||||
let mut state = A;
|
||||
let mut fake_i = ss.len();
|
||||
let mut lim = lim;
|
||||
|
||||
let mut cont = true;
|
||||
let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
|
||||
|
||||
// if the limit is larger than the string, lower it to save cycles
|
||||
if (lim >= fake_i) {
|
||||
lim = fake_i;
|
||||
}
|
||||
|
||||
let machine: &fn((uint, char)) -> bool = |(i, c)| {
|
||||
let whitespace = if ::std::char::is_whitespace(c) { Ws } else { Cr };
|
||||
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
|
||||
|
||||
state = match (state, whitespace, limit) {
|
||||
(A, Ws, _) => { A }
|
||||
(A, Cr, _) => { slice_start = i; last_start = i; B }
|
||||
|
||||
(B, Cr, UnderLim) => { B }
|
||||
(B, Cr, OverLim) if (i - last_start + 1) > lim
|
||||
=> fail!("word starting with %? longer than limit!",
|
||||
ss.slice(last_start, i + 1)),
|
||||
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
|
||||
(B, Ws, UnderLim) => { last_end = i; C }
|
||||
(B, Ws, OverLim) => { last_end = i; slice(); A }
|
||||
|
||||
(C, Cr, UnderLim) => { last_start = i; B }
|
||||
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
|
||||
(C, Ws, OverLim) => { slice(); A }
|
||||
(C, Ws, UnderLim) => { C }
|
||||
};
|
||||
|
||||
cont
|
||||
};
|
||||
|
||||
ss.iter().enumerate().advance(|x| machine(x));
|
||||
|
||||
// Let the automaton 'run out' by supplying trailing whitespace
|
||||
while cont && match state { B | C => true, A => false } {
|
||||
machine((fake_i, ' '));
|
||||
fake_i += 1;
|
||||
}
|
||||
return cont;
|
||||
}
|
||||
|
||||
#[test]
|
||||
priv fn test_split_within() {
|
||||
fn t(s: &str, i: uint, u: &[~str]) {
|
||||
let mut v = ~[];
|
||||
for each_split_within(s, i) |s| { v.push(s.to_owned()) }
|
||||
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
|
||||
}
|
||||
t("", 0, []);
|
||||
t("", 15, []);
|
||||
t("hello", 15, [~"hello"]);
|
||||
t("\nMary had a little lamb\nLittle lamb\n", 15,
|
||||
[~"Mary had a", ~"little lamb", ~"Little lamb"]);
|
||||
t("\nMary had a little lamb\nLittle lamb\n", ::std::uint::max_value,
|
||||
[~"Mary had a little lamb\nLittle lamb"]);
|
||||
}
|
||||
} // end groups module
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -260,7 +260,7 @@ impl Tm {
|
|||
priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
|
||||
fn match_str(s: &str, pos: uint, needle: &str) -> bool {
|
||||
let mut i = pos;
|
||||
for needle.bytes_iter().advance |ch| {
|
||||
for needle.byte_iter().advance |ch| {
|
||||
if s[i] != ch {
|
||||
return false;
|
||||
}
|
||||
|
|
1079
src/libstd/str.rs
1079
src/libstd/str.rs
File diff suppressed because it is too large
Load diff
|
@ -115,7 +115,7 @@ impl<'self> AsciiCast<&'self[Ascii]> for &'self str {
|
|||
|
||||
#[inline]
|
||||
fn is_ascii(&self) -> bool {
|
||||
self.bytes_iter().all(|b| b.is_ascii())
|
||||
self.byte_iter().all(|b| b.is_ascii())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: span, tts: &[ast::token_tree]) -> bas
|
|||
ast::expr_lit(lit) => match lit.node {
|
||||
// string literal, push each byte to vector expression
|
||||
ast::lit_str(s) => {
|
||||
for s.bytes_iter().advance |byte| {
|
||||
for s.byte_iter().advance |byte| {
|
||||
bytes.push(cx.expr_u8(sp, byte));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ pub fn main() {
|
|||
assert_eq!(y, 6);
|
||||
let s = ~"hello there";
|
||||
let mut i: int = 0;
|
||||
for s.bytes_iter().advance |c| {
|
||||
for s.byte_iter().advance |c| {
|
||||
if i == 0 { assert!((c == 'h' as u8)); }
|
||||
if i == 1 { assert!((c == 'e' as u8)); }
|
||||
if i == 2 { assert!((c == 'l' as u8)); }
|
||||
|
|
|
@ -41,7 +41,7 @@ pub fn main() {
|
|||
|
||||
fn check_str_eq(a: ~str, b: ~str) {
|
||||
let mut i: int = 0;
|
||||
for a.bytes_iter().advance |ab| {
|
||||
for a.byte_iter().advance |ab| {
|
||||
info!(i);
|
||||
info!(ab);
|
||||
let bb: u8 = b[i];
|
||||
|
|
Loading…
Add table
Reference in a new issue