@@ -4,14 +4,16 @@ Module: str
4
4
String manipulation.
5
5
*/
6
6
7
- export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, index,
7
+ export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
8
+ byte_len_range, index,
8
9
rindex, find, starts_with, ends_with, substr, slice, split, concat,
9
10
connect, to_upper, replace, char_slice, trim_left, trim_right, trim,
10
11
unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars,
11
- to_chars, char_len, char_at, bytes, is_ascii, shift_byte, pop_byte,
12
+ to_chars, char_len, char_len_range, char_at, bytes, is_ascii,
13
+ shift_byte, pop_byte,
12
14
unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at,
13
15
str_from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice,
14
- contains, iter_chars;
16
+ contains, iter_chars, loop_chars , loop_chars_sub ;
15
17
16
18
native "c-stack-cdecl" mod rustrt {
17
19
fn rust_str_push ( & s: str , ch : u8 ) ;
@@ -136,6 +138,23 @@ fn byte_len(s: str) -> uint unsafe {
136
138
ret vlen - 1 u;
137
139
}
138
140
141
+ /*
142
+ Function byte_len_range
143
+
144
+ As byte_len but for a substring
145
+ */
146
+ fn byte_len_range ( s : str , byte_offset : uint , char_len : uint ) -> uint {
147
+ let i = byte_offset;
148
+ let chars = 0 u;
149
+ while chars < char_len {
150
+ let chsize = utf8_char_width ( s[ i] ) ;
151
+ assert ( chsize > 0 u) ;
152
+ i += chsize;
153
+ chars += 1 u;
154
+ }
155
+ ret i - byte_offset;
156
+ }
157
+
139
158
/*
140
159
Function: bytes
141
160
@@ -314,22 +333,57 @@ fn iter_chars(s: str, it: block(char)) {
314
333
}
315
334
}
316
335
336
+ /*
337
+ Function: loop_chars
338
+
339
+ As `iter_chars` but may be interrupted
340
+ */
341
+ fn loop_chars ( s : str , it : block ( char ) -> bool ) -> bool {
342
+ ret loop_chars_sub ( s, 0 u, byte_len ( s) , it) ;
343
+ }
344
+
345
+ /*
346
+ Function: loop_chars_sub
347
+
348
+ As `loop_chars` but on a substring
349
+ */
350
+ fn loop_chars_sub ( s : str , byte_offset : uint , byte_len : uint ,
351
+ it : block ( char ) -> bool ) -> bool {
352
+ let i = byte_offset;
353
+ let result = true ;
354
+ while i < byte_len {
355
+ let { ch, next} = char_range_at ( s, i) ;
356
+ if !it ( ch) { result = false ; break ; }
357
+ i = next;
358
+ }
359
+ ret result;
360
+ }
361
+
362
+
317
363
/*
318
364
Function: char_len
319
365
320
366
Count the number of unicode characters in a string
321
367
*/
322
368
fn char_len ( s : str ) -> uint {
323
- let i = 0 u;
324
- let len = 0 u;
325
- let total = byte_len ( s) ;
326
- while i < total {
369
+ ret char_len_range ( s, 0 u, byte_len ( s) ) ;
370
+ }
371
+
372
+ /*
373
+ Function: char_len_range
374
+
375
+ As char_len but for a slice of a string
376
+ */
377
+ fn char_len_range ( s : str , byte_start : uint , byte_len : uint ) -> uint {
378
+ let i = byte_start;
379
+ let len = 0 u;
380
+ while i < byte_len {
327
381
let chsize = utf8_char_width ( s[ i] ) ;
328
382
assert ( chsize > 0 u) ;
329
383
len += 1 u;
330
384
i += chsize;
331
385
}
332
- assert ( i == total ) ;
386
+ assert ( i == byte_len ) ;
333
387
ret len;
334
388
}
335
389
@@ -818,3 +872,4 @@ unsafe fn str_from_cstr(cstr: sbuf) -> str {
818
872
}
819
873
ret res;
820
874
}
875
+
0 commit comments