|
75 | 75 | rindex,
|
76 | 76 | find,
|
77 | 77 | find_bytes,
|
| 78 | + find_from_bytes, |
78 | 79 | contains,
|
79 | 80 | starts_with,
|
80 | 81 | ends_with,
|
@@ -901,23 +902,33 @@ fn rindex(ss: str, cc: char) -> option<uint> {
|
901 | 902 | //
|
902 | 903 | // Find the char position of the first instance of one string
|
903 | 904 | // within another, or return option::none
|
| 905 | +fn find_bytes(haystack: str, needle: str) -> option<uint> { |
| 906 | + find_from_bytes(haystack, needle, 0u, len_bytes(haystack)) |
| 907 | +} |
| 908 | + |
| 909 | +//Function: find_from_bytes |
| 910 | +// |
| 911 | +// Find the char position of the first instance of one string |
| 912 | +// within another, or return option::none |
904 | 913 | //
|
905 | 914 | // FIXME: Boyer-Moore should be significantly faster
|
906 |
| -fn find_bytes(haystack: str, needle: str) -> option<uint> { |
907 |
| - let haystack_len = len_bytes(haystack); |
908 |
| - let needle_len = len_bytes(needle); |
| 915 | +fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) |
| 916 | + -> option<uint> { |
| 917 | + assert end <= len_bytes(haystack); |
| 918 | + |
| 919 | + let needle_len = len_bytes(needle); |
909 | 920 |
|
910 |
| - if needle_len == 0u { ret some(0u); } |
911 |
| - if needle_len > haystack_len { ret none; } |
| 921 | + if needle_len == 0u { ret some(start); } |
| 922 | + if needle_len > end { ret none; } |
912 | 923 |
|
913 | 924 | fn match_at(haystack: str, needle: str, ii: uint) -> bool {
|
914 | 925 | let jj = ii;
|
915 | 926 | for c: u8 in needle { if haystack[jj] != c { ret false; } jj += 1u; }
|
916 | 927 | ret true;
|
917 | 928 | }
|
918 | 929 |
|
919 |
| - let ii = 0u; |
920 |
| - while ii <= haystack_len - needle_len { |
| 930 | + let ii = start; |
| 931 | + while ii <= end - needle_len { |
921 | 932 | if match_at(haystack, needle, ii) { ret some(ii); }
|
922 | 933 | ii += 1u;
|
923 | 934 | }
|
@@ -1757,6 +1768,34 @@ mod tests {
|
1757 | 1768 | assert (find_bytes(data, "ไท华") == none);
|
1758 | 1769 | }
|
1759 | 1770 |
|
| 1771 | + #[test] |
| 1772 | + fn test_find_from_bytes() { |
| 1773 | + // byte positions |
| 1774 | + assert (find_from_bytes("", "", 0u, 0u) == some(0u)); |
| 1775 | + |
| 1776 | + let data = "abcabc"; |
| 1777 | + assert find_from_bytes(data, "ab", 0u, 6u) == some(0u); |
| 1778 | + assert find_from_bytes(data, "ab", 2u, 6u) == some(3u); |
| 1779 | + assert find_from_bytes(data, "ab", 2u, 4u) == none; |
| 1780 | + |
| 1781 | + let data = "ประเทศไทย中华Việt Nam"; |
| 1782 | + data += data; |
| 1783 | + assert find_from_bytes(data, "", 0u, 43u) == some(0u); |
| 1784 | + assert find_from_bytes(data, "", 6u, 43u) == some(6u); |
| 1785 | + |
| 1786 | + assert find_from_bytes(data, "ประ", 0u, 43u) == some( 0u); |
| 1787 | + assert find_from_bytes(data, "ทศไ", 0u, 43u) == some(12u); |
| 1788 | + assert find_from_bytes(data, "ย中", 0u, 43u) == some(24u); |
| 1789 | + assert find_from_bytes(data, "iệt", 0u, 43u) == some(34u); |
| 1790 | + assert find_from_bytes(data, "Nam", 0u, 43u) == some(40u); |
| 1791 | + |
| 1792 | + assert find_from_bytes(data, "ประ", 43u, 86u) == some(43u); |
| 1793 | + assert find_from_bytes(data, "ทศไ", 43u, 86u) == some(55u); |
| 1794 | + assert find_from_bytes(data, "ย中", 43u, 86u) == some(67u); |
| 1795 | + assert find_from_bytes(data, "iệt", 43u, 86u) == some(77u); |
| 1796 | + assert find_from_bytes(data, "Nam", 43u, 86u) == some(83u); |
| 1797 | + } |
| 1798 | + |
1760 | 1799 | #[test]
|
1761 | 1800 | fn test_find() {
|
1762 | 1801 | // char positions
|
|
0 commit comments