|
71 | 71 |
|
72 | 72 | // Searching
|
73 | 73 | index,
|
74 |
| - //rindex, |
| 74 | + rindex, |
75 | 75 | index_byte,
|
76 | 76 | rindex_byte,
|
77 | 77 | find,
|
@@ -255,15 +255,12 @@ Function: pop_char
|
255 | 255 | Remove the final character from a string and return it.
|
256 | 256 |
|
257 | 257 | Failure:
|
258 |
| -
|
259 | 258 | If the string does not contain any characters.
|
260 | 259 | */
|
261 | 260 | fn pop_char(&s: str) -> char unsafe {
|
262 | 261 | let end = byte_len(s);
|
263 |
| - while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; } |
264 |
| - assert (end > 0u); |
265 |
| - let ch = char_at(s, end - 1u); |
266 |
| - s = unsafe::slice_bytes(s, 0u, end - 1u); |
| 262 | + let {ch:ch, prev:end} = char_range_at_reverse(s, end); |
| 263 | + s = unsafe::slice_bytes(s, 0u, end); |
267 | 264 | ret ch;
|
268 | 265 | }
|
269 | 266 |
|
@@ -894,6 +891,28 @@ fn index(ss: str, cc: char) -> option<uint> {
|
894 | 891 | ret option::none;
|
895 | 892 | }
|
896 | 893 |
|
| 894 | +// Function: rindex |
| 895 | +// |
| 896 | +// Returns the index of the first matching char |
| 897 | +// (as option some/none) |
| 898 | +fn rindex(ss: str, cc: char) -> option<uint> { |
| 899 | + let bii = byte_len(ss); |
| 900 | + let cii = char_len(ss); |
| 901 | + while bii > 0u { |
| 902 | + let {ch, prev} = char_range_at_reverse(ss, bii); |
| 903 | + cii -= 1u; |
| 904 | + bii = prev; |
| 905 | + |
| 906 | + // found here? |
| 907 | + if ch == cc { |
| 908 | + ret option::some(cii); |
| 909 | + } |
| 910 | + } |
| 911 | + |
| 912 | + // wasn't found |
| 913 | + ret option::none; |
| 914 | +} |
| 915 | + |
897 | 916 | /*
|
898 | 917 | Function: index
|
899 | 918 |
|
@@ -1259,6 +1278,25 @@ Pluck a character out of a string
|
1259 | 1278 | */
|
1260 | 1279 | fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; }
|
1261 | 1280 |
|
| 1281 | +// Function: char_range_at_reverse |
| 1282 | +// |
| 1283 | +// Given a byte position and a str, return the previous char and its position |
| 1284 | +// This function can be used to iterate over a unicode string in reverse. |
| 1285 | +fn char_range_at_reverse(ss: str, start: uint) -> {ch: char, prev: uint} { |
| 1286 | + let prev = start; |
| 1287 | + |
| 1288 | + // while there is a previous byte == 10...... |
| 1289 | + while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 { |
| 1290 | + prev -= 1u; |
| 1291 | + } |
| 1292 | + |
| 1293 | + // now refer to the initial byte of previous char |
| 1294 | + prev -= 1u; |
| 1295 | + |
| 1296 | + let ch = char_at(ss, prev); |
| 1297 | + ret {ch:ch, prev:prev}; |
| 1298 | +} |
| 1299 | + |
1262 | 1300 | /*
|
1263 | 1301 | Function: substr_all
|
1264 | 1302 |
|
@@ -1468,18 +1506,58 @@ mod tests {
|
1468 | 1506 | }
|
1469 | 1507 |
|
1470 | 1508 | #[test]
|
1471 |
| - fn test_index_and_rindex() { |
1472 |
| - assert (index_byte("hello", 'e' as u8) == 1); |
1473 |
| - assert (index_byte("hello", 'o' as u8) == 4); |
1474 |
| - assert (index_byte("hello", 'z' as u8) == -1); |
1475 |
| - assert (index("hello", 'e') == option::some(1u)); |
1476 |
| - assert (index("hello", 'o') == option::some(4u)); |
1477 |
| - assert (index("hello", 'z') == option::none); |
| 1509 | + fn test_index() { |
| 1510 | + assert ( index("hello", 'h') == option::some(0u)); |
| 1511 | + assert ( index("hello", 'e') == option::some(1u)); |
| 1512 | + assert ( index("hello", 'o') == option::some(4u)); |
| 1513 | + assert ( index("hello", 'z') == option::none); |
| 1514 | + } |
| 1515 | + |
| 1516 | + #[test] |
| 1517 | + fn test_rindex() { |
| 1518 | + assert (rindex("hello", 'l') == option::some(3u)); |
| 1519 | + assert (rindex("hello", 'o') == option::some(4u)); |
| 1520 | + assert (rindex("hello", 'h') == option::some(0u)); |
| 1521 | + assert (rindex("hello", 'z') == option::none); |
| 1522 | + } |
| 1523 | + |
| 1524 | + #[test] |
| 1525 | + fn test_index_byte() { |
| 1526 | + assert ( index_byte("hello", 'e' as u8) == 1); |
| 1527 | + assert ( index_byte("hello", 'o' as u8) == 4); |
| 1528 | + assert ( index_byte("hello", 'z' as u8) == -1); |
| 1529 | + } |
| 1530 | + |
| 1531 | + #[test] |
| 1532 | + fn test_rindex_byte() { |
1478 | 1533 | assert (rindex_byte("hello", 'l' as u8) == 3);
|
1479 | 1534 | assert (rindex_byte("hello", 'h' as u8) == 0);
|
1480 | 1535 | assert (rindex_byte("hello", 'z' as u8) == -1);
|
1481 | 1536 | }
|
1482 | 1537 |
|
| 1538 | + #[test] |
| 1539 | + fn test_pop_char() { |
| 1540 | + let data = "ประเทศไทย中华"; |
| 1541 | + let cc = pop_char(data); |
| 1542 | + assert "ประเทศไทย中" == data; |
| 1543 | + assert '华' == cc; |
| 1544 | + } |
| 1545 | + |
| 1546 | + #[test] |
| 1547 | + fn test_pop_char_2() { |
| 1548 | + let data2 = "华"; |
| 1549 | + let cc2 = pop_char(data2); |
| 1550 | + assert "" == data2; |
| 1551 | + assert '华' == cc2; |
| 1552 | + } |
| 1553 | + |
| 1554 | + #[test] |
| 1555 | + #[should_fail] |
| 1556 | + fn test_pop_char_fail() { |
| 1557 | + let data = ""; |
| 1558 | + let _cc3 = pop_char(data); |
| 1559 | + } |
| 1560 | + |
1483 | 1561 | #[test]
|
1484 | 1562 | fn test_split_byte() {
|
1485 | 1563 | fn t(s: str, c: char, u: [str]) {
|
|
0 commit comments