Skip to content

Commit f439906

Browse files
David Rajchenbach-Tellerbrson
authored andcommitted
str.rs: Added functions loop_chars, loop_chars_sub, char_len_range, byte_len_range.
1 parent b0278f5 commit f439906

File tree

1 file changed

+63
-8
lines changed

1 file changed

+63
-8
lines changed

src/lib/str.rs

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,16 @@ Module: str
44
String manipulation.
55
*/
66

7-
export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, index,
7+
export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
8+
byte_len_range, index,
89
rindex, find, starts_with, ends_with, substr, slice, split, concat,
910
connect, to_upper, replace, char_slice, trim_left, trim_right, trim,
1011
unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars,
11-
to_chars, char_len, char_at, bytes, is_ascii, shift_byte, pop_byte,
12+
to_chars, char_len, char_len_range, char_at, bytes, is_ascii,
13+
shift_byte, pop_byte,
1214
unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at,
1315
str_from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice,
14-
contains, iter_chars;
16+
contains, iter_chars, loop_chars, loop_chars_sub;
1517

1618
native "c-stack-cdecl" mod rustrt {
1719
fn rust_str_push(&s: str, ch: u8);
@@ -136,6 +138,23 @@ fn byte_len(s: str) -> uint unsafe {
136138
ret vlen - 1u;
137139
}
138140

141+
/*
142+
Function byte_len_range
143+
144+
As byte_len but for a substring
145+
*/
146+
fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint {
147+
let i = byte_offset;
148+
let chars = 0u;
149+
while chars < char_len {
150+
let chsize = utf8_char_width(s[i]);
151+
assert (chsize > 0u);
152+
i += chsize;
153+
chars += 1u;
154+
}
155+
ret i - byte_offset;
156+
}
157+
139158
/*
140159
Function: bytes
141160
@@ -314,22 +333,57 @@ fn iter_chars(s: str, it: block(char)) {
314333
}
315334
}
316335

336+
/*
337+
Function: loop_chars
338+
339+
As `iter_chars` but may be interrupted
340+
*/
341+
fn loop_chars(s: str, it: block(char) -> bool) -> bool{
342+
ret loop_chars_sub(s, 0u, byte_len(s), it);
343+
}
344+
345+
/*
346+
Function: loop_chars_sub
347+
348+
As `loop_chars` but on a substring
349+
*/
350+
fn loop_chars_sub(s: str, byte_offset: uint, byte_len: uint,
351+
it: block(char) -> bool) -> bool {
352+
let i = byte_offset;
353+
let result = true;
354+
while i < byte_len {
355+
let {ch, next} = char_range_at(s, i);
356+
if !it(ch) {result = false; break;}
357+
i = next;
358+
}
359+
ret result;
360+
}
361+
362+
317363
/*
318364
Function: char_len
319365
320366
Count the number of unicode characters in a string
321367
*/
322368
fn char_len(s: str) -> uint {
323-
let i = 0u;
324-
let len = 0u;
325-
let total = byte_len(s);
326-
while i < total {
369+
ret char_len_range(s, 0u, byte_len(s));
370+
}
371+
372+
/*
373+
Function: char_len_range
374+
375+
As char_len but for a slice of a string
376+
*/
377+
fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
378+
let i = byte_start;
379+
let len = 0u;
380+
while i < byte_len {
327381
let chsize = utf8_char_width(s[i]);
328382
assert (chsize > 0u);
329383
len += 1u;
330384
i += chsize;
331385
}
332-
assert (i == total);
386+
assert (i == byte_len);
333387
ret len;
334388
}
335389

@@ -818,3 +872,4 @@ unsafe fn str_from_cstr(cstr: sbuf) -> str {
818872
}
819873
ret res;
820874
}
875+

0 commit comments

Comments
 (0)