85
85
// Misc
86
86
// FIXME: perhaps some more of this section shouldn't be exported?
87
87
is_utf8,
88
- char_len_range ,
89
- byte_len_range ,
88
+ substr_len_bytes ,
89
+ substr_len_chars ,
90
90
utf8_char_width,
91
91
char_range_at,
92
92
char_at,
@@ -271,7 +271,7 @@ Failure:
271
271
If the string does not contain any characters.
272
272
*/
273
273
fn pop_char ( & s: str ) -> char unsafe {
274
- let end = byte_len ( s) ;
274
+ let end = len_bytes ( s) ;
275
275
let { ch: ch , prev : end } = char_range_at_reverse ( s, end) ;
276
276
s = unsafe :: slice_bytes ( s, 0 u, end) ;
277
277
ret ch;
@@ -288,7 +288,7 @@ If the string does not contain any characters.
288
288
*/
289
289
fn shift_char ( & s: str ) -> char unsafe {
290
290
let r = char_range_at ( s, 0 u) ;
291
- s = unsafe :: slice_bytes ( s, r. next , byte_len ( s) ) ;
291
+ s = unsafe :: slice_bytes ( s, r. next , len_bytes ( s) ) ;
292
292
ret r. ch ;
293
293
}
294
294
@@ -367,7 +367,7 @@ Convert a string to a vector of characters
367
367
fn chars ( s : str ) -> [ char ] {
368
368
let buf: [ char ] = [ ] ;
369
369
let i = 0 u;
370
- let len = byte_len ( s) ;
370
+ let len = len_bytes ( s) ;
371
371
while i < len {
372
372
let cur = char_range_at ( s, i) ;
373
373
buf += [ cur. ch ] ;
@@ -443,7 +443,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe {
443
443
assert u8:: is_ascii ( sep) ;
444
444
445
445
let vv = [ ] ;
446
- let start = 0 u, current = 0 u, len = byte_len ( ss) ;
446
+ let start = 0 u, current = 0 u, len = len_bytes ( ss) ;
447
447
let splits_done = 0 u;
448
448
449
449
while splits_done < count && current < len {
@@ -550,7 +550,7 @@ up to `count` times
550
550
fn splitn_char ( ss : str , sep : char , count : uint ) -> [ str ] unsafe {
551
551
552
552
let vv = [ ] ;
553
- let start = 0 u, current = 0 u, len = byte_len ( ss) ;
553
+ let start = 0 u, current = 0 u, len = len_bytes ( ss) ;
554
554
let splits_done = 0 u;
555
555
556
556
while splits_done < count && current < len {
@@ -656,10 +656,11 @@ Returns:
656
656
The original string with all occurances of `from` replaced with `to`
657
657
*/
658
658
fn replace( s : str , from : str , to : str ) : is_not_empty ( from ) -> str unsafe {
659
- if byte_len ( s) == 0 u {
659
+ if len_bytes ( s) == 0 u {
660
660
ret "" ;
661
661
} else if starts_with ( s, from) {
662
- ret to + replace ( unsafe :: slice_bytes ( s, byte_len ( from) , byte_len ( s) ) ,
662
+ ret to + replace (
663
+ unsafe :: slice_bytes ( s, len_bytes ( from) , len_bytes ( s) ) ,
663
664
from, to) ;
664
665
} else {
665
666
let idx = find ( s, from) ;
@@ -726,7 +727,7 @@ Return true if a predicate matches all characters or
726
727
if the string contains no characters
727
728
*/
728
729
fn all ( s : str , it : fn ( char ) -> bool ) -> bool {
729
- ret substr_all ( s, 0 u, byte_len ( s) , it) ;
730
+ ret substr_all ( s, 0 u, len_bytes ( s) , it) ;
730
731
}
731
732
732
733
/*
@@ -746,7 +747,7 @@ Apply a function to each character
746
747
*/
747
748
fn map ( ss : str , ff : fn ( char ) -> char ) -> str {
748
749
let result = "" ;
749
- reserve ( result, byte_len ( ss) ) ;
750
+ reserve ( result, len_bytes ( ss) ) ;
750
751
751
752
chars_iter ( ss, { |cc|
752
753
str:: push_char ( result, ff ( cc) ) ;
@@ -762,7 +763,7 @@ Iterate over the bytes in a string
762
763
*/
763
764
fn bytes_iter ( ss : str , it : fn ( u8 ) ) {
764
765
let pos = 0 u;
765
- let len = byte_len ( ss) ;
766
+ let len = len_bytes ( ss) ;
766
767
767
768
while ( pos < len) {
768
769
it ( ss[ pos] ) ;
@@ -776,7 +777,7 @@ Function: chars_iter
776
777
Iterate over the characters in a string
777
778
*/
778
779
fn chars_iter ( s : str , it : fn ( char ) ) {
779
- let pos = 0 u, len = byte_len ( s) ;
780
+ let pos = 0 u, len = len_bytes ( s) ;
780
781
while ( pos < len) {
781
782
let { ch, next} = char_range_at ( s, pos) ;
782
783
pos = next;
@@ -833,7 +834,7 @@ Section: Searching
833
834
fn index ( ss : str , cc : char ) -> option < uint > {
834
835
let bii = 0 u;
835
836
let cii = 0 u;
836
- let len = byte_len ( ss) ;
837
+ let len = len_bytes ( ss) ;
837
838
while bii < len {
838
839
let { ch, next} = char_range_at ( ss, bii) ;
839
840
@@ -855,7 +856,7 @@ fn index(ss: str, cc: char) -> option<uint> {
855
856
// Returns the index of the first matching char
856
857
// (as option some/none)
857
858
fn rindex ( ss : str , cc : char ) -> option < uint > {
858
- let bii = byte_len ( ss) ;
859
+ let bii = len_bytes ( ss) ;
859
860
let cii = len ( ss) ;
860
861
while bii > 0 u {
861
862
let { ch, prev} = char_range_at_reverse ( ss, bii) ;
@@ -890,8 +891,8 @@ The index of the first occurance of `needle`, or -1 if not found.
890
891
FIXME: UTF-8
891
892
*/
892
893
fn find ( haystack : str , needle : str ) -> int {
893
- let haystack_len: int = byte_len ( haystack) as int ;
894
- let needle_len: int = byte_len ( needle) as int ;
894
+ let haystack_len: int = len_bytes ( haystack) as int ;
895
+ let needle_len: int = len_bytes ( needle) as int ;
895
896
if needle_len == 0 { ret 0 ; }
896
897
fn match_at ( haystack : str , needle : str , i : int ) -> bool {
897
898
let j: int = i;
@@ -931,8 +932,8 @@ haystack - The string to look in
931
932
needle - The string to look for
932
933
*/
933
934
fn starts_with ( haystack : str , needle : str ) -> bool {
934
- let haystack_len: uint = byte_len ( haystack) ;
935
- let needle_len: uint = byte_len ( needle) ;
935
+ let haystack_len: uint = len_bytes ( haystack) ;
936
+ let needle_len: uint = len_bytes ( needle) ;
936
937
if needle_len == 0 u { ret true ; }
937
938
if needle_len > haystack_len { ret false ; }
938
939
ret eq( substr ( haystack, 0 u, needle_len) , needle) ;
@@ -969,7 +970,7 @@ Function: is_ascii
969
970
Determines if a string contains only ASCII characters
970
971
*/
971
972
fn is_ascii ( s : str ) -> bool {
972
- let i: uint = byte_len ( s) ;
973
+ let i: uint = len_bytes ( s) ;
973
974
while i > 0 u { i -= 1 u; if !u8:: is_ascii ( s[ i] ) { ret false ; } }
974
975
ret true;
975
976
}
@@ -1019,7 +1020,7 @@ pure fn byte_len(s: str) -> uint unsafe { len_bytes(s) }
1019
1020
// String length or size in characters.
1020
1021
// (Synonyms: len_chars, char_len)
1021
1022
fn len ( s : str ) -> uint {
1022
- char_len_range ( s, 0 u, byte_len ( s) )
1023
+ substr_len_chars ( s, 0 u, len_bytes ( s) )
1023
1024
}
1024
1025
1025
1026
fn len_chars ( s : str ) -> uint { len ( s) }
@@ -1054,7 +1055,7 @@ fn is_utf8(v: [u8]) -> bool {
1054
1055
}
1055
1056
1056
1057
/*
1057
- Function: char_len_range
1058
+ Function: substr_len_chars
1058
1059
1059
1060
As char_len but for a slice of a string
1060
1061
@@ -1071,10 +1072,8 @@ Safety note:
1071
1072
- This function does not check whether the substring is valid.
1072
1073
- This function fails if `byte_offset` or `byte_len` do not
1073
1074
represent valid positions inside `s`
1074
-
1075
- FIXME: rename to 'substr_len_chars'
1076
1075
*/
1077
- fn char_len_range ( s : str , byte_start : uint , byte_len : uint ) -> uint {
1076
+ fn substr_len_chars ( s : str , byte_start : uint , byte_len : uint ) -> uint {
1078
1077
let i = byte_start;
1079
1078
let byte_stop = i + byte_len;
1080
1079
let len = 0 u;
@@ -1088,7 +1087,7 @@ fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
1088
1087
}
1089
1088
1090
1089
/*
1091
- Function: byte_len_range
1090
+ Function: substr_len_bytes
1092
1091
1093
1092
As byte_len but for a substring
1094
1093
@@ -1108,7 +1107,7 @@ valid positions in `s`
1108
1107
1109
1108
FIXME: rename to 'substr_len_bytes'
1110
1109
*/
1111
- fn byte_len_range ( s : str , byte_offset : uint , char_len : uint ) -> uint {
1110
+ fn substr_len_bytes ( s : str , byte_offset : uint , char_len : uint ) -> uint {
1112
1111
let i = byte_offset;
1113
1112
let chars = 0 u;
1114
1113
while chars < char_len {
@@ -1149,7 +1148,7 @@ This function can be used to iterate over the unicode characters of a string.
1149
1148
Example:
1150
1149
> let s = "中华Việt Nam";
1151
1150
> let i = 0u;
1152
- > while i < str::byte_len (s) {
1151
+ > while i < str::len_bytes (s) {
1153
1152
> let {ch, next} = str::char_range_at(s, i);
1154
1153
> std::io::println(#fmt("%u: %c",i,ch));
1155
1154
> i = next;
@@ -1391,7 +1390,7 @@ mod unsafe {
1391
1390
unsafe fn slice_bytes ( s : str , begin : uint , end : uint ) -> str unsafe {
1392
1391
// FIXME: Typestate precondition
1393
1392
assert ( begin <= end) ;
1394
- assert ( end <= byte_len ( s) ) ;
1393
+ assert ( end <= len_bytes ( s) ) ;
1395
1394
1396
1395
let v = as_bytes ( s) { |v| vec:: slice ( v, begin, end) } ;
1397
1396
v += [ 0u8 ] ;
@@ -1408,7 +1407,7 @@ mod unsafe {
1408
1407
unsafe fn slice_bytes_safe_range ( s : str , begin : uint , end : uint )
1409
1408
: uint:: le( begin , end ) -> str {
1410
1409
// would need some magic to make this a precondition
1411
- assert ( end <= byte_len ( s) ) ;
1410
+ assert ( end <= len_bytes ( s) ) ;
1412
1411
ret slice_bytes ( s, begin, end) ;
1413
1412
}
1414
1413
@@ -1430,7 +1429,7 @@ mod unsafe {
1430
1429
//
1431
1430
// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1432
1431
unsafe fn pop_byte ( & s: str ) -> u8 unsafe {
1433
- let len = byte_len ( s) ;
1432
+ let len = len_bytes ( s) ;
1434
1433
assert ( len > 0 u) ;
1435
1434
let b = s[ len - 1 u] ;
1436
1435
s = unsafe :: slice_bytes ( s, 0 u, len - 1 u) ;
@@ -1441,7 +1440,7 @@ mod unsafe {
1441
1440
//
1442
1441
// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1443
1442
unsafe fn shift_byte ( & s: str ) -> u8 unsafe {
1444
- let len = byte_len ( s) ;
1443
+ let len = len_bytes ( s) ;
1445
1444
assert ( len > 0 u) ;
1446
1445
let b = s[ 0 ] ;
1447
1446
s = unsafe :: slice_bytes ( s, 1 u, len) ;
@@ -1470,14 +1469,23 @@ mod tests {
1470
1469
}
1471
1470
1472
1471
#[ test]
1473
- fn test_bytes_len ( ) {
1474
- assert ( byte_len ( "" ) == 0 u) ;
1475
- assert ( byte_len ( "hello world" ) == 11 u) ;
1476
- assert ( byte_len ( "\x63 " ) == 1 u) ;
1477
- assert ( byte_len ( "\xa2 " ) == 2 u) ;
1478
- assert ( byte_len ( "\u03c0 " ) == 2 u) ;
1479
- assert ( byte_len ( "\u2620 " ) == 3 u) ;
1480
- assert ( byte_len ( "\U 0001d11e" ) == 4 u) ;
1472
+ fn test_len ( ) {
1473
+ assert ( len_bytes ( "" ) == 0 u) ;
1474
+ assert ( len_bytes ( "hello world" ) == 11 u) ;
1475
+ assert ( len_bytes ( "\x63 " ) == 1 u) ;
1476
+ assert ( len_bytes ( "\xa2 " ) == 2 u) ;
1477
+ assert ( len_bytes ( "\u03c0 " ) == 2 u) ;
1478
+ assert ( len_bytes ( "\u2620 " ) == 3 u) ;
1479
+ assert ( len_bytes ( "\U 0001d11e" ) == 4 u) ;
1480
+
1481
+ assert ( len ( "" ) == 0 u) ;
1482
+ assert ( len ( "hello world" ) == 11 u) ;
1483
+ assert ( len ( "\x63 " ) == 1 u) ;
1484
+ assert ( len ( "\xa2 " ) == 1 u) ;
1485
+ assert ( len ( "\u03c0 " ) == 1 u) ;
1486
+ assert ( len ( "\u2620 " ) == 1 u) ;
1487
+ assert ( len ( "\U 0001d11e" ) == 1 u) ;
1488
+ assert ( len ( "ประเทศไทย中华Việt Nam" ) == 19 u) ;
1481
1489
}
1482
1490
1483
1491
#[ test]
@@ -1712,7 +1720,7 @@ mod tests {
1712
1720
#[ test]
1713
1721
fn test_substr ( ) {
1714
1722
fn t ( a : str , b : str , start : int ) {
1715
- assert ( eq ( substr ( a, start as uint , byte_len ( b) ) , b) ) ;
1723
+ assert ( eq ( substr ( a, start as uint , len_bytes ( b) ) , b) ) ;
1716
1724
}
1717
1725
t ( "hello" , "llo" , 2 ) ;
1718
1726
t ( "hello" , "el" , 1 ) ;
@@ -2002,7 +2010,7 @@ mod tests {
2002
2010
let v: [ u8 ] = bytes ( s1) ;
2003
2011
let s2: str = from_bytes ( v) ;
2004
2012
let i: uint = 0 u;
2005
- let n1: uint = byte_len ( s1) ;
2013
+ let n1: uint = len_bytes ( s1) ;
2006
2014
let n2: uint = vec:: len :: < u8 > ( v) ;
2007
2015
assert ( n1 == n2) ;
2008
2016
while i < n1 {
0 commit comments