Skip to content

Commit 8069ae1

Browse files
committed
---
yaml --- r: 78801 b: refs/heads/try c: bdd188d h: refs/heads/master i: 78799: e781548 v: v3
1 parent cde640f commit 8069ae1

File tree

2 files changed

+109
-10
lines changed

2 files changed

+109
-10
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
refs/heads/master: 25ed29a0edb3d48fef843a0b818ee68faf2252da
33
refs/heads/snap-stage1: e33de59e47c5076a89eadeb38f4934f58a3618a6
44
refs/heads/snap-stage3: 60fba4d7d677ec098e6a43014132fe99f7547363
5-
refs/heads/try: 24bde7ff5a414973227b27195b919ffbf3a0be7d
5+
refs/heads/try: bdd188d003b56b55f534ad692ea1b17ca569b6fe
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
77
refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
88
refs/heads/try2: 147ecfdd8221e4a4d4e090486829a06da1e0ca3c

branches/try/src/libstd/str.rs

Lines changed: 108 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,23 @@ Section: Creating a string
5656
pub fn from_bytes(vv: &[u8]) -> ~str {
5757
use str::not_utf8::cond;
5858

59-
if !is_utf8(vv) {
60-
let first_bad_byte = *vv.iter().find(|&b| !is_utf8([*b])).unwrap();
61-
cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
62-
first_bad_byte as uint))
59+
match from_bytes_opt(vv) {
60+
None => {
61+
let first_bad_byte = *vv.iter().find(|&b| !is_utf8([*b])).unwrap();
62+
cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
63+
first_bad_byte as uint))
64+
}
65+
Some(s) => s
66+
}
67+
}
68+
69+
/// Convert a vector of bytes to a new UTF-8 string, if possible.
70+
/// Returns None if the vector contains invalid UTF-8.
71+
pub fn from_bytes_opt(vv: &[u8]) -> Option<~str> {
72+
if is_utf8(vv) {
73+
Some(unsafe { raw::from_bytes(vv) })
6374
} else {
64-
return unsafe { raw::from_bytes(vv) }
75+
None
6576
}
6677
}
6778

@@ -78,7 +89,17 @@ pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
7889
cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
7990
first_bad_byte as uint))
8091
} else {
81-
return unsafe { raw::from_bytes_owned(vv) }
92+
unsafe { raw::from_bytes_owned(vv) }
93+
}
94+
}
95+
96+
/// Consumes a vector of bytes to create a new utf-8 string.
97+
/// Returns None if the vector contains invalid UTF-8.
98+
pub fn from_bytes_owned_opt(vv: ~[u8]) -> Option<~str> {
99+
if is_utf8(vv) {
100+
Some(unsafe { raw::from_bytes_owned(vv) })
101+
} else {
102+
None
82103
}
83104
}
84105

@@ -91,8 +112,16 @@ pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
91112
///
92113
/// Fails if invalid UTF-8
93114
pub fn from_bytes_slice<'a>(v: &'a [u8]) -> &'a str {
94-
assert!(is_utf8(v));
95-
unsafe { cast::transmute(v) }
115+
from_bytes_slice_opt(v).expect("from_bytes_slice: not utf-8")
116+
}
117+
118+
/// Converts a vector to a string slice without performing any allocations.
119+
///
120+
/// Returns None if the slice is not utf-8.
121+
pub fn from_bytes_slice_opt<'a>(v: &'a [u8]) -> Option<&'a str> {
122+
if is_utf8(v) {
123+
Some(unsafe { cast::transmute(v) })
124+
} else { None }
96125
}
97126

98127
impl ToStr for ~str {
@@ -2358,7 +2387,7 @@ impl Zero for @str {
23582387
#[cfg(test)]
23592388
mod tests {
23602389
use container::Container;
2361-
use option::Some;
2390+
use option::{None, Some};
23622391
use libc::c_char;
23632392
use libc;
23642393
use ptr;
@@ -3539,6 +3568,76 @@ mod tests {
35393568
let mut s = ~"\u00FC"; // ü
35403569
s.truncate(1);
35413570
}
3571+
3572+
#[test]
3573+
fn test_str_from_bytes_slice() {
3574+
let xs = bytes!("hello");
3575+
assert_eq!(from_bytes_slice(xs), "hello");
3576+
3577+
let xs = bytes!("ศไทย中华Việt Nam");
3578+
assert_eq!(from_bytes_slice(xs), "ศไทย中华Việt Nam");
3579+
}
3580+
3581+
#[test]
3582+
#[should_fail]
3583+
fn test_str_from_bytes_slice_invalid() {
3584+
let xs = bytes!("hello", 0xff);
3585+
let _ = from_bytes_slice(xs);
3586+
}
3587+
3588+
#[test]
3589+
fn test_str_from_bytes_slice_opt() {
3590+
let xs = bytes!("hello");
3591+
assert_eq!(from_bytes_slice_opt(xs), Some("hello"));
3592+
3593+
let xs = bytes!("ศไทย中华Việt Nam");
3594+
assert_eq!(from_bytes_slice_opt(xs), Some("ศไทย中华Việt Nam"));
3595+
3596+
let xs = bytes!("hello", 0xff);
3597+
assert_eq!(from_bytes_slice_opt(xs), None);
3598+
}
3599+
3600+
#[test]
3601+
fn test_str_from_bytes() {
3602+
let xs = bytes!("hello");
3603+
assert_eq!(from_bytes(xs), ~"hello");
3604+
3605+
let xs = bytes!("ศไทย中华Việt Nam");
3606+
assert_eq!(from_bytes(xs), ~"ศไทย中华Việt Nam");
3607+
}
3608+
3609+
#[test]
3610+
fn test_str_from_bytes_opt() {
3611+
let xs = bytes!("hello").to_owned();
3612+
assert_eq!(from_bytes_opt(xs), Some(~"hello"));
3613+
3614+
let xs = bytes!("ศไทย中华Việt Nam");
3615+
assert_eq!(from_bytes_opt(xs), Some(~"ศไทย中华Việt Nam"));
3616+
3617+
let xs = bytes!("hello", 0xff);
3618+
assert_eq!(from_bytes_opt(xs), None);
3619+
}
3620+
3621+
#[test]
3622+
fn test_str_from_bytes_owned() {
3623+
let xs = bytes!("hello").to_owned();
3624+
assert_eq!(from_bytes_owned(xs), ~"hello");
3625+
3626+
let xs = bytes!("ศไทย中华Việt Nam").to_owned();
3627+
assert_eq!(from_bytes_owned(xs), ~"ศไทย中华Việt Nam");
3628+
}
3629+
3630+
#[test]
3631+
fn test_str_from_bytes_owned_opt() {
3632+
let xs = bytes!("hello").to_owned();
3633+
assert_eq!(from_bytes_owned_opt(xs), Some(~"hello"));
3634+
3635+
let xs = bytes!("ศไทย中华Việt Nam").to_owned();
3636+
assert_eq!(from_bytes_owned_opt(xs), Some(~"ศไทย中华Việt Nam"));
3637+
3638+
let xs = bytes!("hello", 0xff).to_owned();
3639+
assert_eq!(from_bytes_owned_opt(xs), None);
3640+
}
35423641
}
35433642
35443643
#[cfg(test)]

0 commit comments

Comments
 (0)