Skip to content

Commit 7ce2d9c

Browse files
committed
libs: stabilize ascii module
This is an initial API stabilization pass for `std::ascii`. Aside from some renaming to match conversion conventions, and deprecations in favor of using iterators directly, almost nothing is changed here. However, the static case conversion tables that were previously public are now private. The stabilization of the (rather large!) set of extension traits is left to a follow-up pass, because we hope to land some more general machinery that will provide the same functionality without custom traits. [breaking-change]
1 parent c9f6d69 commit 7ce2d9c

File tree

3 files changed

+57
-16
lines changed

3 files changed

+57
-16
lines changed

src/compiletest/errors.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
use std::ascii::AsciiExt;
1112
use std::io::{BufferedReader, File};
1213
use regex::Regex;
1314

@@ -31,7 +32,7 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
3132
fn parse_expected(line_num: uint, line: &str, re: &Regex) -> Option<ExpectedError> {
3233
re.captures(line).and_then(|caps| {
3334
let adjusts = caps.name("adjusts").len();
34-
let kind = caps.name("kind").to_ascii().to_lowercase().into_string();
35+
let kind = caps.name("kind").to_ascii_lower();
3536
let msg = caps.name("msg").trim().to_string();
3637

3738
debug!("line={} kind={} msg={}", line_num, kind, msg);

src/librustdoc/html/markdown.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#![allow(non_camel_case_types)]
2929

3030
use libc;
31+
use std::ascii::AsciiExt;
3132
use std::cell::{RefCell, Cell};
3233
use std::fmt;
3334
use std::slice;
@@ -226,12 +227,8 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
226227
};
227228

228229
// Transform the contents of the header into a hyphenated string
229-
let id = s.as_slice().words().map(|s| {
230-
match s.to_ascii_opt() {
231-
Some(s) => s.to_lowercase().into_string(),
232-
None => s.to_string()
233-
}
234-
}).collect::<Vec<String>>().connect("-");
230+
let id = s.as_slice().words().map(|s| s.to_ascii_lower())
231+
.collect::<Vec<String>>().connect("-");
235232

236233
// This is a terrible hack working around how hoedown gives us rendered
237234
// html for text rather than the raw text.

src/libstd/ascii.rs

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
//! Operations on ASCII strings and characters
1414
15-
#![experimental]
15+
#![unstable = "unsure about placement and naming"]
1616

1717
use core::kinds::Sized;
1818
use fmt;
@@ -31,30 +31,40 @@ pub struct Ascii { chr: u8 }
3131
impl Ascii {
3232
/// Converts an ascii character into a `u8`.
3333
#[inline]
34-
pub fn to_byte(self) -> u8 {
34+
#[unstable = "recently renamed"]
35+
pub fn as_byte(&self) -> u8 {
3536
self.chr
3637
}
3738

39+
#[deprecated = "use as_byte"]
40+
pub fn to_byte(self) -> u8 {
41+
self.as_byte()
42+
}
43+
3844
/// Converts an ascii character into a `char`.
3945
#[inline]
40-
pub fn to_char(self) -> char {
46+
#[unstable = "recently renamed"]
47+
pub fn as_char(&self) -> char {
4148
self.chr as char
4249
}
4350

4451
/// Convert to lowercase.
4552
#[inline]
46-
pub fn to_lowercase(self) -> Ascii {
53+
#[stable]
54+
pub fn to_lowercase(&self) -> Ascii {
4755
Ascii{chr: ASCII_LOWER_MAP[self.chr as uint]}
4856
}
4957

5058
/// Convert to uppercase.
5159
#[inline]
52-
pub fn to_uppercase(self) -> Ascii {
60+
#[stable]
61+
pub fn to_uppercase(&self) -> Ascii {
5362
Ascii{chr: ASCII_UPPER_MAP[self.chr as uint]}
5463
}
5564

5665
/// Compares two ascii characters of equality, ignoring case.
5766
#[inline]
67+
#[deprecated = "normalize with to_lowercase"]
5868
pub fn eq_ignore_case(self, other: Ascii) -> bool {
5969
ASCII_LOWER_MAP[self.chr as uint] == ASCII_LOWER_MAP[other.chr as uint]
6070
}
@@ -63,66 +73,77 @@ impl Ascii {
6373

6474
/// Check if the character is a letter (a-z, A-Z)
6575
#[inline]
76+
#[stable]
6677
pub fn is_alphabetic(&self) -> bool {
6778
(self.chr >= 0x41 && self.chr <= 0x5A) || (self.chr >= 0x61 && self.chr <= 0x7A)
6879
}
6980

7081
/// Check if the character is a number (0-9)
7182
#[inline]
83+
#[unstable = "may be renamed"]
7284
pub fn is_digit(&self) -> bool {
7385
self.chr >= 0x30 && self.chr <= 0x39
7486
}
7587

7688
/// Check if the character is a letter or number
7789
#[inline]
90+
#[stable]
7891
pub fn is_alphanumeric(&self) -> bool {
7992
self.is_alphabetic() || self.is_digit()
8093
}
8194

8295
/// Check if the character is a space or horizontal tab
8396
#[inline]
97+
#[experimental = "likely to be removed"]
8498
pub fn is_blank(&self) -> bool {
8599
self.chr == b' ' || self.chr == b'\t'
86100
}
87101

88102
/// Check if the character is a control character
89103
#[inline]
104+
#[stable]
90105
pub fn is_control(&self) -> bool {
91106
self.chr < 0x20 || self.chr == 0x7F
92107
}
93108

94109
/// Checks if the character is printable (except space)
95110
#[inline]
111+
#[experimental = "unsure about naming, or whether this is needed"]
96112
pub fn is_graph(&self) -> bool {
97113
(self.chr - 0x21) < 0x5E
98114
}
99115

100116
/// Checks if the character is printable (including space)
101117
#[inline]
118+
#[unstable = "unsure about naming"]
102119
pub fn is_print(&self) -> bool {
103120
(self.chr - 0x20) < 0x5F
104121
}
105122

106-
/// Checks if the character is lowercase
123+
/// Checks if the character is alphabetic and lowercase
107124
#[inline]
125+
#[stable]
108126
pub fn is_lowercase(&self) -> bool {
109127
(self.chr - b'a') < 26
110128
}
111129

112-
/// Checks if the character is uppercase
130+
/// Checks if the character is alphabetic and uppercase
113131
#[inline]
132+
#[stable]
114133
pub fn is_uppercase(&self) -> bool {
115134
(self.chr - b'A') < 26
116135
}
117136

118137
/// Checks if the character is punctuation
119138
#[inline]
139+
#[stable]
120140
pub fn is_punctuation(&self) -> bool {
121141
self.is_graph() && !self.is_alphanumeric()
122142
}
123143

124144
/// Checks if the character is a valid hex digit
125145
#[inline]
146+
#[stable]
126147
pub fn is_hex(&self) -> bool {
127148
self.is_digit() || ((self.chr | 32u8) - b'a') < 6
128149
}
@@ -135,6 +156,7 @@ impl<'a> fmt::Show for Ascii {
135156
}
136157

137158
/// Trait for converting into an ascii type.
159+
#[experimental = "may be replaced by generic conversion traits"]
138160
pub trait AsciiCast<T> {
139161
/// Convert to an ascii type, panic on non-ASCII input.
140162
#[inline]
@@ -160,6 +182,7 @@ pub trait AsciiCast<T> {
160182
fn is_ascii(&self) -> bool;
161183
}
162184

185+
#[experimental = "may be replaced by generic conversion traits"]
163186
impl<'a> AsciiCast<&'a[Ascii]> for &'a [u8] {
164187
#[inline]
165188
unsafe fn to_ascii_nocheck(&self) -> &'a[Ascii] {
@@ -175,6 +198,7 @@ impl<'a> AsciiCast<&'a[Ascii]> for &'a [u8] {
175198
}
176199
}
177200

201+
#[experimental = "may be replaced by generic conversion traits"]
178202
impl<'a> AsciiCast<&'a [Ascii]> for &'a str {
179203
#[inline]
180204
unsafe fn to_ascii_nocheck(&self) -> &'a [Ascii] {
@@ -187,6 +211,7 @@ impl<'a> AsciiCast<&'a [Ascii]> for &'a str {
187211
}
188212
}
189213

214+
#[experimental = "may be replaced by generic conversion traits"]
190215
impl AsciiCast<Ascii> for u8 {
191216
#[inline]
192217
unsafe fn to_ascii_nocheck(&self) -> Ascii {
@@ -199,6 +224,7 @@ impl AsciiCast<Ascii> for u8 {
199224
}
200225
}
201226

227+
#[experimental = "may be replaced by generic conversion traits"]
202228
impl AsciiCast<Ascii> for char {
203229
#[inline]
204230
unsafe fn to_ascii_nocheck(&self) -> Ascii {
@@ -212,6 +238,7 @@ impl AsciiCast<Ascii> for char {
212238
}
213239

214240
/// Trait for copyless casting to an ascii vector.
241+
#[experimental = "may be replaced by generic conversion traits"]
215242
pub trait OwnedAsciiCast {
216243
/// Check if convertible to ascii
217244
fn is_ascii(&self) -> bool;
@@ -241,6 +268,7 @@ pub trait OwnedAsciiCast {
241268
unsafe fn into_ascii_nocheck(self) -> Vec<Ascii>;
242269
}
243270

271+
#[experimental = "may be replaced by generic conversion traits"]
244272
impl OwnedAsciiCast for String {
245273
#[inline]
246274
fn is_ascii(&self) -> bool {
@@ -253,6 +281,7 @@ impl OwnedAsciiCast for String {
253281
}
254282
}
255283

284+
#[experimental = "may be replaced by generic conversion traits"]
256285
impl OwnedAsciiCast for Vec<u8> {
257286
#[inline]
258287
fn is_ascii(&self) -> bool {
@@ -274,6 +303,7 @@ impl OwnedAsciiCast for Vec<u8> {
274303

275304
/// Trait for converting an ascii type to a string. Needed to convert
276305
/// `&[Ascii]` to `&str`.
306+
#[experimental = "may be replaced by generic conversion traits"]
277307
pub trait AsciiStr for Sized? {
278308
/// Convert to a string.
279309
fn as_str_ascii<'a>(&'a self) -> &'a str;
@@ -283,19 +313,23 @@ pub trait AsciiStr for Sized? {
283313
fn to_lower(&self) -> Vec<Ascii>;
284314

285315
/// Convert to vector representing a lower cased ascii string.
316+
#[deprecated = "use iterators instead"]
286317
fn to_lowercase(&self) -> Vec<Ascii>;
287318

288319
/// Deprecated: use `to_uppercase`
289320
#[deprecated="renamed `to_uppercase`"]
290321
fn to_upper(&self) -> Vec<Ascii>;
291322

292323
/// Convert to vector representing a upper cased ascii string.
324+
#[deprecated = "use iterators instead"]
293325
fn to_uppercase(&self) -> Vec<Ascii>;
294326

295327
/// Compares two Ascii strings ignoring case.
328+
#[deprecated = "use iterators instead"]
296329
fn eq_ignore_case(&self, other: &[Ascii]) -> bool;
297330
}
298331

332+
#[experimental = "may be replaced by generic conversion traits"]
299333
impl AsciiStr for [Ascii] {
300334
#[inline]
301335
fn as_str_ascii<'a>(&'a self) -> &'a str {
@@ -338,11 +372,13 @@ impl IntoString for Vec<Ascii> {
338372
}
339373

340374
/// Trait to convert to an owned byte vector by consuming self
375+
#[experimental = "may be replaced by generic conversion traits"]
341376
pub trait IntoBytes {
342377
/// Converts to an owned byte vector by consuming self
343378
fn into_bytes(self) -> Vec<u8>;
344379
}
345380

381+
#[experimental = "may be replaced by generic conversion traits"]
346382
impl IntoBytes for Vec<Ascii> {
347383
fn into_bytes(self) -> Vec<u8> {
348384
unsafe {
@@ -360,6 +396,7 @@ impl IntoBytes for Vec<Ascii> {
360396

361397

362398
/// Extension methods for ASCII-subset only operations on owned strings
399+
#[experimental = "would prefer to do this in a more general way"]
363400
pub trait OwnedAsciiExt {
364401
/// Convert the string to ASCII upper case:
365402
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
@@ -373,6 +410,7 @@ pub trait OwnedAsciiExt {
373410
}
374411

375412
/// Extension methods for ASCII-subset only operations on string slices
413+
#[experimental = "would prefer to do this in a more general way"]
376414
pub trait AsciiExt<T> for Sized? {
377415
/// Makes a copy of the string in ASCII upper case:
378416
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
@@ -390,6 +428,7 @@ pub trait AsciiExt<T> for Sized? {
390428
fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
391429
}
392430

431+
#[experimental = "would prefer to do this in a more general way"]
393432
impl AsciiExt<String> for str {
394433
#[inline]
395434
fn to_ascii_upper(&self) -> String {
@@ -409,6 +448,7 @@ impl AsciiExt<String> for str {
409448
}
410449
}
411450

451+
#[experimental = "would prefer to do this in a more general way"]
412452
impl OwnedAsciiExt for String {
413453
#[inline]
414454
fn into_ascii_upper(self) -> String {
@@ -423,6 +463,7 @@ impl OwnedAsciiExt for String {
423463
}
424464
}
425465

466+
#[experimental = "would prefer to do this in a more general way"]
426467
impl AsciiExt<Vec<u8>> for [u8] {
427468
#[inline]
428469
fn to_ascii_upper(&self) -> Vec<u8> {
@@ -445,6 +486,7 @@ impl AsciiExt<Vec<u8>> for [u8] {
445486
}
446487
}
447488

489+
#[experimental = "would prefer to do this in a more general way"]
448490
impl OwnedAsciiExt for Vec<u8> {
449491
#[inline]
450492
fn into_ascii_upper(mut self) -> Vec<u8> {
@@ -474,6 +516,7 @@ impl OwnedAsciiExt for Vec<u8> {
474516
/// - Any other chars in the range [0x20,0x7e] are not escaped.
475517
/// - Any other chars are given hex escapes.
476518
/// - Unicode escapes are never generated by this function.
519+
#[unstable = "needs to be updated to use an iterator"]
477520
pub fn escape_default(c: u8, f: |u8|) {
478521
match c {
479522
b'\t' => { f(b'\\'); f(b't'); }
@@ -496,7 +539,7 @@ pub fn escape_default(c: u8, f: |u8|) {
496539
}
497540
}
498541

499-
pub static ASCII_LOWER_MAP: [u8, ..256] = [
542+
static ASCII_LOWER_MAP: [u8, ..256] = [
500543
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
501544
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
502545
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -535,7 +578,7 @@ pub static ASCII_LOWER_MAP: [u8, ..256] = [
535578
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
536579
];
537580

538-
pub static ASCII_UPPER_MAP: [u8, ..256] = [
581+
static ASCII_UPPER_MAP: [u8, ..256] = [
539582
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
540583
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
541584
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,

0 commit comments

Comments
 (0)