Skip to content

Commit 3ac00a9

Browse files
committed
std: remove substr & str::count_*, methodise char_len, implement slice_chars.
The confusing mixture of byte index and character count meant that every use of .substr was incorrect; replaced by slice_chars which only uses character indices. The old behaviour of `.substr(start, n)` can be emulated via `.slice_from(start).slice_chars(0, n)`.
1 parent 9fff8c6 commit 3ac00a9

File tree

10 files changed

+76
-104
lines changed

10 files changed

+76
-104
lines changed

doc/tutorial.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ let new_favorite_crayon_name = favorite_crayon_name.trim();
14101410
14111411
if favorite_crayon_name.len() > 5 {
14121412
// Create a substring
1413-
println(favorite_crayon_name.substr(0, 5));
1413+
println(favorite_crayon_name.slice_chars(0, 5));
14141414
}
14151415
~~~
14161416

src/libextra/rope.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ pub fn of_str(str: @~str) -> Rope {
8484
*
8585
* # Return value
8686
*
87-
* A rope representing the same string as `str.substr(byte_offset,
88-
* byte_len)`. Depending on `byte_len`, this rope may be empty, flat
89-
* or complex.
87+
* A rope representing the same string as `str.slice(byte_offset,
88+
* byte_offset + byte_len)`. Depending on `byte_len`, this rope may
89+
* be empty, flat or complex.
9090
*
9191
* # Performance note
9292
*
@@ -588,7 +588,7 @@ pub mod node {
588588
* * char_len - The number of chars in the leaf.
589589
* * content - Contents of the leaf.
590590
*
591-
* Note that we can have `char_len < str::char_len(content)`, if
591+
* Note that we can have `char_len < content.char_len()`, if
592592
* this leaf is only a subset of the string. Also note that the
593593
* string can be shared between several ropes, e.g. for indexing
594594
* purposes.
@@ -680,7 +680,7 @@ pub mod node {
680680
*/
681681
pub fn of_substr(str: @~str, byte_start: uint, byte_len: uint) -> @Node {
682682
return of_substr_unsafer(str, byte_start, byte_len,
683-
str::count_chars(*str, byte_start, byte_len));
683+
str.slice(byte_start, byte_start + byte_len).char_len());
684684
}
685685

686686
/**
@@ -734,7 +734,7 @@ pub mod node {
734734
if i == 0u { first_leaf_char_len }
735735
else { hint_max_leaf_char_len };
736736
let chunk_byte_len =
737-
str::count_bytes(*str, offset, chunk_char_len);
737+
str.slice_from(offset).slice_chars(0, chunk_char_len).len();
738738
nodes[i] = @Leaf(Leaf {
739739
byte_offset: offset,
740740
byte_len: chunk_byte_len,
@@ -938,7 +938,7 @@ pub mod node {
938938
match (*node) {
939939
node::Leaf(x) => {
940940
let char_len =
941-
str::count_chars(*x.content, byte_offset, byte_len);
941+
x.content.slice(byte_offset, byte_offset + byte_len).char_len();
942942
return @Leaf(Leaf {
943943
byte_offset: byte_offset,
944944
byte_len: byte_len,
@@ -1002,9 +1002,9 @@ pub mod node {
10021002
return node;
10031003
}
10041004
let byte_offset =
1005-
str::count_bytes(*x.content, 0u, char_offset);
1005+
x.content.slice_chars(0, char_offset).len();
10061006
let byte_len =
1007-
str::count_bytes(*x.content, byte_offset, char_len);
1007+
x.content.slice_from(byte_offset).slice_chars(0, char_len).len();
10081008
return @Leaf(Leaf {
10091009
byte_offset: byte_offset,
10101010
byte_len: byte_len,
@@ -1312,7 +1312,7 @@ mod tests {
13121312
let sample = @~"0123456789ABCDE";
13131313
let r = of_str(sample);
13141314

1315-
assert_eq!(char_len(r), str::char_len(*sample));
1315+
assert_eq!(char_len(r), sample.char_len());
13161316
assert!(rope_to_string(r) == *sample);
13171317
}
13181318

@@ -1328,7 +1328,7 @@ mod tests {
13281328
}
13291329
let sample = @copy *buf;
13301330
let r = of_str(sample);
1331-
assert!(char_len(r) == str::char_len(*sample));
1331+
assert_eq!(char_len(r), sample.char_len());
13321332
assert!(rope_to_string(r) == *sample);
13331333

13341334
let mut string_iter = 0u;
@@ -1374,7 +1374,7 @@ mod tests {
13741374
}
13751375
}
13761376

1377-
assert_eq!(len, str::char_len(*sample));
1377+
assert_eq!(len, sample.char_len());
13781378
}
13791379

13801380
#[test]

src/libextra/terminfo/searcher.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pub fn get_dbpath_for_term(term: &str) -> Option<~path> {
2727
let homedir = os::homedir();
2828

2929
let mut dirs_to_search = ~[];
30-
let first_char = term.substr(0, 1);
30+
let first_char = term.char_at(0);
3131

3232
// Find search directory
3333
match getenv("TERMINFO") {
@@ -57,12 +57,12 @@ pub fn get_dbpath_for_term(term: &str) -> Option<~path> {
5757

5858
// Look for the terminal in all of the search directories
5959
for dirs_to_search.each |p| {
60-
let newp = ~p.push_many(&[first_char.to_owned(), term.to_owned()]);
60+
let newp = ~p.push_many(&[str::from_char(first_char), term.to_owned()]);
6161
if os::path_exists(p) && os::path_exists(newp) {
6262
return Some(newp);
6363
}
6464
// on some installations the dir is named after the hex of the char (e.g. OS X)
65-
let newp = ~p.push_many(&[fmt!("%x", first_char[0] as uint), term.to_owned()]);
65+
let newp = ~p.push_many(&[fmt!("%x", first_char as uint), term.to_owned()]);
6666
if os::path_exists(p) && os::path_exists(newp) {
6767
return Some(newp);
6868
}

src/librustc/driver/driver.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ pub fn build_session_options(binary: @~str,
590590

591591
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
592592
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
593-
let level_short = level_name.substr(0,1);
593+
let level_short = level_name.slice_chars(0, 1);
594594
let level_short = level_short.to_ascii().to_upper().to_str_ascii();
595595
let flags = vec::append(getopts::opt_strs(matches, level_short),
596596
getopts::opt_strs(matches, level_name));

src/librustc/middle/resolve.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2678,14 +2678,14 @@ impl Resolver {
26782678
match module_prefix_result {
26792679
Failed => {
26802680
let mpath = self.idents_to_str(module_path);
2681-
match self.idents_to_str(module_path).rfind(':') {
2681+
match mpath.rfind(':') {
26822682
Some(idx) => {
26832683
self.session.span_err(span, fmt!("unresolved import: could not find `%s` \
2684-
in `%s`", mpath.substr(idx,
2685-
mpath.len() - idx),
2686-
// idx - 1 to account for the extra
2687-
// colon
2688-
mpath.substr(0, idx - 1)));
2684+
in `%s`",
2685+
// idx +- 1 to account for the colons
2686+
// on either side
2687+
mpath.slice_from(idx + 1),
2688+
mpath.slice_to(idx - 1)));
26892689
},
26902690
None => (),
26912691
};

src/librustc/middle/trans/expr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1704,5 +1704,5 @@ fn trans_assign_op(bcx: block,
17041704
}
17051705

17061706
fn shorten(x: ~str) -> ~str {
1707-
if x.len() > 60 { x.substr(0, 60).to_owned() } else { x }
1707+
if x.char_len() > 60 { x.slice_chars(0, 60).to_owned() } else { x }
17081708
}

src/librusti/rusti.rc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ fn run_cmd(repl: &mut Repl, _in: @io::Reader, _out: @io::Writer,
284284
for args.each |arg| {
285285
let (crate, filename) =
286286
if arg.ends_with(".rs") || arg.ends_with(".rc") {
287-
(arg.substr(0, arg.len() - 3).to_owned(), copy *arg)
287+
(arg.slice_to(arg.len() - 3).to_owned(), copy *arg)
288288
} else {
289289
(copy *arg, arg + ".rs")
290290
};
@@ -342,7 +342,8 @@ pub fn run_line(repl: &mut Repl, in: @io::Reader, out: @io::Writer, line: ~str,
342342
// FIXME #5898: conflicts with Cell.take(), so can't be at the top level
343343
use core::iterator::IteratorUtil;
344344

345-
let full = line.substr(1, line.len() - 1);
345+
// drop the : and the \n (one byte each)
346+
let full = line.slice(1, line.len() - 1);
346347
let split: ~[~str] = full.word_iter().transform(|s| s.to_owned()).collect();
347348
let len = split.len();
348349

src/libstd/str.rs

Lines changed: 44 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -848,15 +848,6 @@ fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
848848
return true;
849849
}
850850

851-
852-
/*
853-
Section: String properties
854-
*/
855-
856-
/// Returns the number of characters that a string holds
857-
#[inline(always)]
858-
pub fn char_len(s: &str) -> uint { count_chars(s, 0u, s.len()) }
859-
860851
/*
861852
Section: Misc
862853
*/
@@ -974,46 +965,6 @@ pub fn with_capacity(capacity: uint) -> ~str {
974965
buf
975966
}
976967

977-
/**
978-
* As char_len but for a slice of a string
979-
*
980-
* # Arguments
981-
*
982-
* * s - A valid string
983-
* * start - The position inside `s` where to start counting in bytes
984-
* * end - The position where to stop counting
985-
*
986-
* # Return value
987-
*
988-
* The number of Unicode characters in `s` between the given indices.
989-
*/
990-
pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
991-
assert!(s.is_char_boundary(start));
992-
assert!(s.is_char_boundary(end));
993-
let mut (i, len) = (start, 0u);
994-
while i < end {
995-
let next = s.char_range_at(i).next;
996-
len += 1u;
997-
i = next;
998-
}
999-
return len;
1000-
}
1001-
1002-
/// Counts the number of bytes taken by the first `n` chars in `s`
1003-
/// starting from `start`.
1004-
pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
1005-
assert!(s.is_char_boundary(start));
1006-
let mut (end, cnt) = (start, n);
1007-
let l = s.len();
1008-
while cnt > 0u {
1009-
assert!(end < l);
1010-
let next = s.char_range_at(end).next;
1011-
cnt -= 1u;
1012-
end = next;
1013-
}
1014-
end - start
1015-
}
1016-
1017968
/// Given a first byte, determine how many bytes are in this UTF-8 character
1018969
pub fn utf8_char_width(b: u8) -> uint {
1019970
let byte: uint = b as uint;
@@ -1394,11 +1345,14 @@ pub trait StrSlice<'self> {
13941345
fn is_alphanumeric(&self) -> bool;
13951346
fn len(&self) -> uint;
13961347
fn char_len(&self) -> uint;
1348+
13971349
fn slice(&self, begin: uint, end: uint) -> &'self str;
13981350
fn slice_from(&self, begin: uint) -> &'self str;
13991351
fn slice_to(&self, end: uint) -> &'self str;
1352+
1353+
fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1354+
14001355
fn starts_with(&self, needle: &str) -> bool;
1401-
fn substr(&self, begin: uint, n: uint) -> &'self str;
14021356
fn escape_default(&self) -> ~str;
14031357
fn escape_unicode(&self) -> ~str;
14041358
fn trim(&self) -> &'self str;
@@ -1595,7 +1549,8 @@ impl<'self> StrSlice<'self> for &'self str {
15951549
}
15961550
/// Returns the number of characters that a string holds
15971551
#[inline]
1598-
fn char_len(&self) -> uint { char_len(*self) }
1552+
fn char_len(&self) -> uint { self.iter().count() }
1553+
15991554
/**
16001555
* Returns a slice of the given string from the byte range
16011556
* [`begin`..`end`)
@@ -1626,6 +1581,32 @@ impl<'self> StrSlice<'self> for &'self str {
16261581
fn slice_to(&self, end: uint) -> &'self str {
16271582
self.slice(0, end)
16281583
}
1584+
1585+
/// Returns a slice of the string from the char range
1586+
/// [`begin`..`end`).
1587+
///
1588+
/// Fails if `begin` > `end` or the either `begin` or `end` are
1589+
/// beyond the last character of the string.
1590+
fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1591+
assert!(begin <= end);
1592+
// not sure how to use the iterators for this nicely.
1593+
let mut (position, count) = (0, 0);
1594+
let l = self.len();
1595+
while count < begin && position < l {
1596+
position = self.char_range_at(position).next;
1597+
count += 1;
1598+
}
1599+
if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1600+
let start_byte = position;
1601+
while count < end && position < l {
1602+
position = self.char_range_at(position).next;
1603+
count += 1;
1604+
}
1605+
if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1606+
1607+
self.slice(start_byte, position)
1608+
}
1609+
16291610
/// Returns true if `needle` is a prefix of the string.
16301611
fn starts_with<'a>(&self, needle: &'a str) -> bool {
16311612
let (self_len, needle_len) = (self.len(), needle.len());
@@ -1641,16 +1622,6 @@ impl<'self> StrSlice<'self> for &'self str {
16411622
else { match_at(*self, needle, self_len - needle_len) }
16421623
}
16431624
1644-
/**
1645-
* Take a substring of another.
1646-
*
1647-
* Returns a string containing `n` characters starting at byte offset
1648-
* `begin`.
1649-
*/
1650-
#[inline]
1651-
fn substr(&self, begin: uint, n: uint) -> &'self str {
1652-
self.slice(begin, begin + count_bytes(*self, begin, n))
1653-
}
16541625
/// Escape each char in `s` with char::escape_default.
16551626
#[inline]
16561627
fn escape_default(&self) -> ~str { escape_default(*self) }
@@ -2367,14 +2338,14 @@ mod tests {
23672338
assert_eq!("\u2620".len(), 3u);
23682339
assert_eq!("\U0001d11e".len(), 4u);
23692340

2370-
assert_eq!(char_len(""), 0u);
2371-
assert_eq!(char_len("hello world"), 11u);
2372-
assert_eq!(char_len("\x63"), 1u);
2373-
assert_eq!(char_len("\xa2"), 1u);
2374-
assert_eq!(char_len("\u03c0"), 1u);
2375-
assert_eq!(char_len("\u2620"), 1u);
2376-
assert_eq!(char_len("\U0001d11e"), 1u);
2377-
assert_eq!(char_len("ประเทศไทย中华Việt Nam"), 19u);
2341+
assert_eq!("".char_len(), 0u);
2342+
assert_eq!("hello world".char_len(), 11u);
2343+
assert_eq!("\x63".char_len(), 1u);
2344+
assert_eq!("\xa2".char_len(), 1u);
2345+
assert_eq!("\u03c0".char_len(), 1u);
2346+
assert_eq!("\u2620".char_len(), 1u);
2347+
assert_eq!("\U0001d11e".char_len(), 1u);
2348+
assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
23782349
}
23792350

23802351
#[test]
@@ -2509,13 +2480,13 @@ mod tests {
25092480
}
25102481
25112482
#[test]
2512-
fn test_substr() {
2513-
fn t(a: &str, b: &str, start: int) {
2514-
assert_eq!(a.substr(start as uint, b.len()), b);
2483+
fn test_slice_chars() {
2484+
fn t(a: &str, b: &str, start: uint) {
2485+
assert_eq!(a.slice_chars(start, start + b.char_len()), b);
25152486
}
25162487
t("hello", "llo", 2);
25172488
t("hello", "el", 1);
2518-
assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".substr(6u, 6u));
2489+
assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
25192490
}
25202491
25212492
#[test]

src/libstd/unstable/extfmt.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ pub mod ct {
325325
'o' => TyOctal,
326326
'f' => TyFloat,
327327
'?' => TyPoly,
328-
_ => err(~"unknown type in conversion: " + s.substr(i, 1))
328+
_ => err(fmt!("unknown type in conversion: %c", s.char_at(i)))
329329
};
330330

331331
Parsed::new(t, i + 1)
@@ -546,7 +546,7 @@ pub mod rt {
546546
// displayed
547547
let unpadded = match cv.precision {
548548
CountImplied => s,
549-
CountIs(max) => if (max as uint) < str::char_len(s) {
549+
CountIs(max) => if (max as uint) < s.char_len() {
550550
s.slice(0, max as uint)
551551
} else {
552552
s
@@ -584,7 +584,7 @@ pub mod rt {
584584
~""
585585
} else {
586586
let s = uint::to_str_radix(num, radix);
587-
let len = str::char_len(s);
587+
let len = s.char_len();
588588
if len < prec {
589589
let diff = prec - len;
590590
let pad = str::from_chars(vec::from_elem(diff, '0'));
@@ -614,7 +614,7 @@ pub mod rt {
614614
}
615615
CountIs(width) => { width as uint }
616616
};
617-
let strlen = str::char_len(s) + headsize;
617+
let strlen = s.char_len() + headsize;
618618
if uwidth <= strlen {
619619
for head.iter().advance |&c| {
620620
buf.push_char(c);

0 commit comments

Comments
 (0)