Skip to content

Commit 2ba44e2

Browse files
committed
(core::str) rename byte_len_range -> substr_len_bytes and
rename char_len_range -> substr_len_chars
1 parent 2b4f513 commit 2ba44e2

File tree

3 files changed

+56
-48
lines changed

3 files changed

+56
-48
lines changed

src/comp/syntax/codemap.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)
135135
let {fm,line} = lookup_line(cm,chpos,lookup);
136136
let line_offset = fm.lines[line].byte - fm.start_pos.byte;
137137
let col = chpos - fm.lines[line].ch;
138-
let col_offset = str::byte_len_range(*fm.src, line_offset, col);
138+
let col_offset = str::substr_len_bytes(*fm.src, line_offset, col);
139139
ret {fm: fm, pos: line_offset + col_offset};
140140
}
141141

src/libcore/str.rs

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ export
8585
// Misc
8686
// FIXME: perhaps some more of this section shouldn't be exported?
8787
is_utf8,
88-
char_len_range,
89-
byte_len_range,
88+
substr_len_bytes,
89+
substr_len_chars,
9090
utf8_char_width,
9191
char_range_at,
9292
char_at,
@@ -271,7 +271,7 @@ Failure:
271271
If the string does not contain any characters.
272272
*/
273273
fn pop_char(&s: str) -> char unsafe {
274-
let end = byte_len(s);
274+
let end = len_bytes(s);
275275
let {ch:ch, prev:end} = char_range_at_reverse(s, end);
276276
s = unsafe::slice_bytes(s, 0u, end);
277277
ret ch;
@@ -288,7 +288,7 @@ If the string does not contain any characters.
288288
*/
289289
fn shift_char(&s: str) -> char unsafe {
290290
let r = char_range_at(s, 0u);
291-
s = unsafe::slice_bytes(s, r.next, byte_len(s));
291+
s = unsafe::slice_bytes(s, r.next, len_bytes(s));
292292
ret r.ch;
293293
}
294294

@@ -367,7 +367,7 @@ Convert a string to a vector of characters
367367
fn chars(s: str) -> [char] {
368368
let buf: [char] = [];
369369
let i = 0u;
370-
let len = byte_len(s);
370+
let len = len_bytes(s);
371371
while i < len {
372372
let cur = char_range_at(s, i);
373373
buf += [cur.ch];
@@ -443,7 +443,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe {
443443
assert u8::is_ascii(sep);
444444

445445
let vv = [];
446-
let start = 0u, current = 0u, len = byte_len(ss);
446+
let start = 0u, current = 0u, len = len_bytes(ss);
447447
let splits_done = 0u;
448448

449449
while splits_done < count && current < len {
@@ -550,7 +550,7 @@ up to `count` times
550550
fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe {
551551

552552
let vv = [];
553-
let start = 0u, current = 0u, len = byte_len(ss);
553+
let start = 0u, current = 0u, len = len_bytes(ss);
554554
let splits_done = 0u;
555555

556556
while splits_done < count && current < len {
@@ -656,10 +656,11 @@ Returns:
656656
The original string with all occurances of `from` replaced with `to`
657657
*/
658658
fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe {
659-
if byte_len(s) == 0u {
659+
if len_bytes(s) == 0u {
660660
ret "";
661661
} else if starts_with(s, from) {
662-
ret to + replace(unsafe::slice_bytes(s, byte_len(from), byte_len(s)),
662+
ret to + replace(
663+
unsafe::slice_bytes(s, len_bytes(from), len_bytes(s)),
663664
from, to);
664665
} else {
665666
let idx = find(s, from);
@@ -726,7 +727,7 @@ Return true if a predicate matches all characters or
726727
if the string contains no characters
727728
*/
728729
fn all(s: str, it: fn(char) -> bool) -> bool{
729-
ret substr_all(s, 0u, byte_len(s), it);
730+
ret substr_all(s, 0u, len_bytes(s), it);
730731
}
731732

732733
/*
@@ -746,7 +747,7 @@ Apply a function to each character
746747
*/
747748
fn map(ss: str, ff: fn(char) -> char) -> str {
748749
let result = "";
749-
reserve(result, byte_len(ss));
750+
reserve(result, len_bytes(ss));
750751

751752
chars_iter(ss, {|cc|
752753
str::push_char(result, ff(cc));
@@ -762,7 +763,7 @@ Iterate over the bytes in a string
762763
*/
763764
fn bytes_iter(ss: str, it: fn(u8)) {
764765
let pos = 0u;
765-
let len = byte_len(ss);
766+
let len = len_bytes(ss);
766767

767768
while (pos < len) {
768769
it(ss[pos]);
@@ -776,7 +777,7 @@ Function: chars_iter
776777
Iterate over the characters in a string
777778
*/
778779
fn chars_iter(s: str, it: fn(char)) {
779-
let pos = 0u, len = byte_len(s);
780+
let pos = 0u, len = len_bytes(s);
780781
while (pos < len) {
781782
let {ch, next} = char_range_at(s, pos);
782783
pos = next;
@@ -833,7 +834,7 @@ Section: Searching
833834
fn index(ss: str, cc: char) -> option<uint> {
834835
let bii = 0u;
835836
let cii = 0u;
836-
let len = byte_len(ss);
837+
let len = len_bytes(ss);
837838
while bii < len {
838839
let {ch, next} = char_range_at(ss, bii);
839840

@@ -855,7 +856,7 @@ fn index(ss: str, cc: char) -> option<uint> {
855856
// Returns the index of the first matching char
856857
// (as option some/none)
857858
fn rindex(ss: str, cc: char) -> option<uint> {
858-
let bii = byte_len(ss);
859+
let bii = len_bytes(ss);
859860
let cii = len(ss);
860861
while bii > 0u {
861862
let {ch, prev} = char_range_at_reverse(ss, bii);
@@ -890,8 +891,8 @@ The index of the first occurance of `needle`, or -1 if not found.
890891
FIXME: UTF-8
891892
*/
892893
fn find(haystack: str, needle: str) -> int {
893-
let haystack_len: int = byte_len(haystack) as int;
894-
let needle_len: int = byte_len(needle) as int;
894+
let haystack_len: int = len_bytes(haystack) as int;
895+
let needle_len: int = len_bytes(needle) as int;
895896
if needle_len == 0 { ret 0; }
896897
fn match_at(haystack: str, needle: str, i: int) -> bool {
897898
let j: int = i;
@@ -931,8 +932,8 @@ haystack - The string to look in
931932
needle - The string to look for
932933
*/
933934
fn starts_with(haystack: str, needle: str) -> bool {
934-
let haystack_len: uint = byte_len(haystack);
935-
let needle_len: uint = byte_len(needle);
935+
let haystack_len: uint = len_bytes(haystack);
936+
let needle_len: uint = len_bytes(needle);
936937
if needle_len == 0u { ret true; }
937938
if needle_len > haystack_len { ret false; }
938939
ret eq(substr(haystack, 0u, needle_len), needle);
@@ -969,7 +970,7 @@ Function: is_ascii
969970
Determines if a string contains only ASCII characters
970971
*/
971972
fn is_ascii(s: str) -> bool {
972-
let i: uint = byte_len(s);
973+
let i: uint = len_bytes(s);
973974
while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } }
974975
ret true;
975976
}
@@ -1019,7 +1020,7 @@ pure fn byte_len(s: str) -> uint unsafe { len_bytes(s) }
10191020
// String length or size in characters.
10201021
// (Synonyms: len_chars, char_len)
10211022
fn len(s: str) -> uint {
1022-
char_len_range(s, 0u, byte_len(s))
1023+
substr_len_chars(s, 0u, len_bytes(s))
10231024
}
10241025

10251026
fn len_chars(s: str) -> uint { len(s) }
@@ -1054,7 +1055,7 @@ fn is_utf8(v: [u8]) -> bool {
10541055
}
10551056

10561057
/*
1057-
Function: char_len_range
1058+
Function: substr_len_chars
10581059
10591060
As char_len but for a slice of a string
10601061
@@ -1071,10 +1072,8 @@ Safety note:
10711072
- This function does not check whether the substring is valid.
10721073
- This function fails if `byte_offset` or `byte_len` do not
10731074
represent valid positions inside `s`
1074-
1075-
FIXME: rename to 'substr_len_chars'
10761075
*/
1077-
fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
1076+
fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint {
10781077
let i = byte_start;
10791078
let byte_stop = i + byte_len;
10801079
let len = 0u;
@@ -1088,7 +1087,7 @@ fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
10881087
}
10891088

10901089
/*
1091-
Function: byte_len_range
1090+
Function: substr_len_bytes
10921091
10931092
As byte_len but for a substring
10941093
@@ -1108,7 +1107,7 @@ valid positions in `s`
11081107
11091108
FIXME: rename to 'substr_len_bytes'
11101109
*/
1111-
fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint {
1110+
fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint {
11121111
let i = byte_offset;
11131112
let chars = 0u;
11141113
while chars < char_len {
@@ -1149,7 +1148,7 @@ This function can be used to iterate over the unicode characters of a string.
11491148
Example:
11501149
> let s = "中华Việt Nam";
11511150
> let i = 0u;
1152-
> while i < str::byte_len(s) {
1151+
> while i < str::len_bytes(s) {
11531152
> let {ch, next} = str::char_range_at(s, i);
11541153
> std::io::println(#fmt("%u: %c",i,ch));
11551154
> i = next;
@@ -1391,7 +1390,7 @@ mod unsafe {
13911390
unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe {
13921391
// FIXME: Typestate precondition
13931392
assert (begin <= end);
1394-
assert (end <= byte_len(s));
1393+
assert (end <= len_bytes(s));
13951394

13961395
let v = as_bytes(s) { |v| vec::slice(v, begin, end) };
13971396
v += [0u8];
@@ -1408,7 +1407,7 @@ mod unsafe {
14081407
unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint)
14091408
: uint::le(begin, end) -> str {
14101409
// would need some magic to make this a precondition
1411-
assert (end <= byte_len(s));
1410+
assert (end <= len_bytes(s));
14121411
ret slice_bytes(s, begin, end);
14131412
}
14141413

@@ -1430,7 +1429,7 @@ mod unsafe {
14301429
//
14311430
// Removes the last byte from a string and returns it. (Not UTF-8 safe).
14321431
unsafe fn pop_byte(&s: str) -> u8 unsafe {
1433-
let len = byte_len(s);
1432+
let len = len_bytes(s);
14341433
assert (len > 0u);
14351434
let b = s[len - 1u];
14361435
s = unsafe::slice_bytes(s, 0u, len - 1u);
@@ -1441,7 +1440,7 @@ mod unsafe {
14411440
//
14421441
// Removes the first byte from a string and returns it. (Not UTF-8 safe).
14431442
unsafe fn shift_byte(&s: str) -> u8 unsafe {
1444-
let len = byte_len(s);
1443+
let len = len_bytes(s);
14451444
assert (len > 0u);
14461445
let b = s[0];
14471446
s = unsafe::slice_bytes(s, 1u, len);
@@ -1470,14 +1469,23 @@ mod tests {
14701469
}
14711470

14721471
#[test]
1473-
fn test_bytes_len() {
1474-
assert (byte_len("") == 0u);
1475-
assert (byte_len("hello world") == 11u);
1476-
assert (byte_len("\x63") == 1u);
1477-
assert (byte_len("\xa2") == 2u);
1478-
assert (byte_len("\u03c0") == 2u);
1479-
assert (byte_len("\u2620") == 3u);
1480-
assert (byte_len("\U0001d11e") == 4u);
1472+
fn test_len() {
1473+
assert (len_bytes("") == 0u);
1474+
assert (len_bytes("hello world") == 11u);
1475+
assert (len_bytes("\x63") == 1u);
1476+
assert (len_bytes("\xa2") == 2u);
1477+
assert (len_bytes("\u03c0") == 2u);
1478+
assert (len_bytes("\u2620") == 3u);
1479+
assert (len_bytes("\U0001d11e") == 4u);
1480+
1481+
assert (len("") == 0u);
1482+
assert (len("hello world") == 11u);
1483+
assert (len("\x63") == 1u);
1484+
assert (len("\xa2") == 1u);
1485+
assert (len("\u03c0") == 1u);
1486+
assert (len("\u2620") == 1u);
1487+
assert (len("\U0001d11e") == 1u);
1488+
assert (len("ประเทศไทย中华Việt Nam") == 19u);
14811489
}
14821490

14831491
#[test]
@@ -1712,7 +1720,7 @@ mod tests {
17121720
#[test]
17131721
fn test_substr() {
17141722
fn t(a: str, b: str, start: int) {
1715-
assert (eq(substr(a, start as uint, byte_len(b)), b));
1723+
assert (eq(substr(a, start as uint, len_bytes(b)), b));
17161724
}
17171725
t("hello", "llo", 2);
17181726
t("hello", "el", 1);
@@ -2002,7 +2010,7 @@ mod tests {
20022010
let v: [u8] = bytes(s1);
20032011
let s2: str = from_bytes(v);
20042012
let i: uint = 0u;
2005-
let n1: uint = byte_len(s1);
2013+
let n1: uint = len_bytes(s1);
20062014
let n2: uint = vec::len::<u8>(v);
20072015
assert (n1 == n2);
20082016
while i < n1 {

src/libstd/rope.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ mod node {
743743
*/
744744
fn of_substr(str: @str, byte_start: uint, byte_len: uint) -> @node {
745745
ret of_substr_unsafer(str, byte_start, byte_len,
746-
str::char_len_range(*str, byte_start, byte_len));
746+
str::substr_len_chars(*str, byte_start, byte_len));
747747
}
748748

749749
/*
@@ -795,7 +795,7 @@ mod node {
795795
if i == 0u { first_leaf_char_len }
796796
else { hint_max_leaf_char_len };
797797
let chunk_byte_len =
798-
str::byte_len_range(*str, offset, chunk_char_len);
798+
str::substr_len_bytes(*str, offset, chunk_char_len);
799799
nodes[i] = @leaf({
800800
byte_offset: offset,
801801
byte_len: chunk_byte_len,
@@ -998,7 +998,7 @@ mod node {
998998
alt(*node) {
999999
node::leaf(x) {
10001000
let char_len =
1001-
str::char_len_range(*x.content, byte_offset, byte_len);
1001+
str::substr_len_chars(*x.content, byte_offset, byte_len);
10021002
ret @leaf({byte_offset: byte_offset,
10031003
byte_len: byte_len,
10041004
char_len: char_len,
@@ -1059,9 +1059,9 @@ mod node {
10591059
ret node;
10601060
}
10611061
let byte_offset =
1062-
str::byte_len_range(*x.content, 0u, char_offset);
1062+
str::substr_len_bytes(*x.content, 0u, char_offset);
10631063
let byte_len =
1064-
str::byte_len_range(*x.content, byte_offset, char_len);
1064+
str::substr_len_bytes(*x.content, byte_offset, char_len);
10651065
ret @leaf({byte_offset: byte_offset,
10661066
byte_len: byte_len,
10671067
char_len: char_len,

0 commit comments

Comments
 (0)