Skip to content

Commit b21b480

Browse files
author
Jakub Bukaj
committed
rollup merge of #19194: aturon/stab-ascii
This is an initial API stabilization pass for `std::ascii`. Aside from some renaming to match conversion conventions, and deprecations in favor of using iterators directly, almost nothing is changed here. However, the static case conversion tables that were previously public are now private. The stabilization of the (rather large!) set of extension traits is left to a follow-up pass, because we hope to land some more general machinery that will provide the same functionality without custom traits. [breaking-change]
2 parents 77d1f0b + 6733d8b commit b21b480

File tree

5 files changed

+79
-31
lines changed

5 files changed

+79
-31
lines changed

src/compiletest/errors.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
use std::ascii::AsciiExt;
1112
use std::io::{BufferedReader, File};
1213
use regex::Regex;
1314

@@ -31,7 +32,7 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
3132
fn parse_expected(line_num: uint, line: &str, re: &Regex) -> Option<ExpectedError> {
3233
re.captures(line).and_then(|caps| {
3334
let adjusts = caps.name("adjusts").len();
34-
let kind = caps.name("kind").to_ascii().to_lowercase().into_string();
35+
let kind = caps.name("kind").to_ascii_lower();
3536
let msg = caps.name("msg").trim().to_string();
3637

3738
debug!("line={} kind={} msg={}", line_num, kind, msg);

src/librustdoc/html/markdown.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#![allow(non_camel_case_types)]
2929

3030
use libc;
31+
use std::ascii::AsciiExt;
3132
use std::cell::{RefCell, Cell};
3233
use std::fmt;
3334
use std::slice;
@@ -223,12 +224,8 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
223224
};
224225

225226
// Transform the contents of the header into a hyphenated string
226-
let id = s.as_slice().words().map(|s| {
227-
match s.to_ascii_opt() {
228-
Some(s) => s.to_lowercase().into_string(),
229-
None => s.to_string()
230-
}
231-
}).collect::<Vec<String>>().connect("-");
227+
let id = s.as_slice().words().map(|s| s.to_ascii_lower())
228+
.collect::<Vec<String>>().connect("-");
232229

233230
// This is a terrible hack working around how hoedown gives us rendered
234231
// html for text rather than the raw text.

src/libstd/ascii.rs

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212

1313
//! Operations on ASCII strings and characters
1414
15-
#![experimental]
15+
#![unstable = "unsure about placement and naming"]
16+
#![allow(deprecated)]
1617

1718
use core::kinds::Sized;
1819
use fmt;
@@ -31,30 +32,47 @@ pub struct Ascii { chr: u8 }
3132
impl Ascii {
3233
/// Converts an ascii character into a `u8`.
3334
#[inline]
34-
pub fn to_byte(self) -> u8 {
35+
#[unstable = "recently renamed"]
36+
pub fn as_byte(&self) -> u8 {
3537
self.chr
3638
}
3739

40+
/// Deprecated: use `as_byte` isntead.
41+
#[deprecated = "use as_byte"]
42+
pub fn to_byte(self) -> u8 {
43+
self.as_byte()
44+
}
45+
3846
/// Converts an ascii character into a `char`.
3947
#[inline]
40-
pub fn to_char(self) -> char {
48+
#[unstable = "recently renamed"]
49+
pub fn as_char(&self) -> char {
4150
self.chr as char
4251
}
4352

53+
/// Deprecated: use `as_char` isntead.
54+
#[deprecated = "use as_char"]
55+
pub fn to_char(self) -> char {
56+
self.as_char()
57+
}
58+
4459
/// Convert to lowercase.
4560
#[inline]
46-
pub fn to_lowercase(self) -> Ascii {
61+
#[stable]
62+
pub fn to_lowercase(&self) -> Ascii {
4763
Ascii{chr: ASCII_LOWER_MAP[self.chr as uint]}
4864
}
4965

5066
/// Convert to uppercase.
5167
#[inline]
52-
pub fn to_uppercase(self) -> Ascii {
68+
#[stable]
69+
pub fn to_uppercase(&self) -> Ascii {
5370
Ascii{chr: ASCII_UPPER_MAP[self.chr as uint]}
5471
}
5572

5673
/// Compares two ascii characters of equality, ignoring case.
5774
#[inline]
75+
#[deprecated = "normalize with to_lowercase"]
5876
pub fn eq_ignore_case(self, other: Ascii) -> bool {
5977
ASCII_LOWER_MAP[self.chr as uint] == ASCII_LOWER_MAP[other.chr as uint]
6078
}
@@ -63,66 +81,77 @@ impl Ascii {
6381

6482
/// Check if the character is a letter (a-z, A-Z)
6583
#[inline]
84+
#[stable]
6685
pub fn is_alphabetic(&self) -> bool {
6786
(self.chr >= 0x41 && self.chr <= 0x5A) || (self.chr >= 0x61 && self.chr <= 0x7A)
6887
}
6988

7089
/// Check if the character is a number (0-9)
7190
#[inline]
91+
#[unstable = "may be renamed"]
7292
pub fn is_digit(&self) -> bool {
7393
self.chr >= 0x30 && self.chr <= 0x39
7494
}
7595

7696
/// Check if the character is a letter or number
7797
#[inline]
98+
#[stable]
7899
pub fn is_alphanumeric(&self) -> bool {
79100
self.is_alphabetic() || self.is_digit()
80101
}
81102

82103
/// Check if the character is a space or horizontal tab
83104
#[inline]
105+
#[experimental = "likely to be removed"]
84106
pub fn is_blank(&self) -> bool {
85107
self.chr == b' ' || self.chr == b'\t'
86108
}
87109

88110
/// Check if the character is a control character
89111
#[inline]
112+
#[stable]
90113
pub fn is_control(&self) -> bool {
91114
self.chr < 0x20 || self.chr == 0x7F
92115
}
93116

94117
/// Checks if the character is printable (except space)
95118
#[inline]
119+
#[experimental = "unsure about naming, or whether this is needed"]
96120
pub fn is_graph(&self) -> bool {
97121
(self.chr - 0x21) < 0x5E
98122
}
99123

100124
/// Checks if the character is printable (including space)
101125
#[inline]
126+
#[unstable = "unsure about naming"]
102127
pub fn is_print(&self) -> bool {
103128
(self.chr - 0x20) < 0x5F
104129
}
105130

106-
/// Checks if the character is lowercase
131+
/// Checks if the character is alphabetic and lowercase
107132
#[inline]
133+
#[stable]
108134
pub fn is_lowercase(&self) -> bool {
109135
(self.chr - b'a') < 26
110136
}
111137

112-
/// Checks if the character is uppercase
138+
/// Checks if the character is alphabetic and uppercase
113139
#[inline]
140+
#[stable]
114141
pub fn is_uppercase(&self) -> bool {
115142
(self.chr - b'A') < 26
116143
}
117144

118145
/// Checks if the character is punctuation
119146
#[inline]
147+
#[stable]
120148
pub fn is_punctuation(&self) -> bool {
121149
self.is_graph() && !self.is_alphanumeric()
122150
}
123151

124152
/// Checks if the character is a valid hex digit
125153
#[inline]
154+
#[stable]
126155
pub fn is_hex(&self) -> bool {
127156
self.is_digit() || ((self.chr | 32u8) - b'a') < 6
128157
}
@@ -135,6 +164,7 @@ impl<'a> fmt::Show for Ascii {
135164
}
136165

137166
/// Trait for converting into an ascii type.
167+
#[experimental = "may be replaced by generic conversion traits"]
138168
pub trait AsciiCast<T> {
139169
/// Convert to an ascii type, panic on non-ASCII input.
140170
#[inline]
@@ -160,6 +190,7 @@ pub trait AsciiCast<T> {
160190
fn is_ascii(&self) -> bool;
161191
}
162192

193+
#[experimental = "may be replaced by generic conversion traits"]
163194
impl<'a> AsciiCast<&'a[Ascii]> for &'a [u8] {
164195
#[inline]
165196
unsafe fn to_ascii_nocheck(&self) -> &'a[Ascii] {
@@ -175,6 +206,7 @@ impl<'a> AsciiCast<&'a[Ascii]> for &'a [u8] {
175206
}
176207
}
177208

209+
#[experimental = "may be replaced by generic conversion traits"]
178210
impl<'a> AsciiCast<&'a [Ascii]> for &'a str {
179211
#[inline]
180212
unsafe fn to_ascii_nocheck(&self) -> &'a [Ascii] {
@@ -187,6 +219,7 @@ impl<'a> AsciiCast<&'a [Ascii]> for &'a str {
187219
}
188220
}
189221

222+
#[experimental = "may be replaced by generic conversion traits"]
190223
impl AsciiCast<Ascii> for u8 {
191224
#[inline]
192225
unsafe fn to_ascii_nocheck(&self) -> Ascii {
@@ -199,6 +232,7 @@ impl AsciiCast<Ascii> for u8 {
199232
}
200233
}
201234

235+
#[experimental = "may be replaced by generic conversion traits"]
202236
impl AsciiCast<Ascii> for char {
203237
#[inline]
204238
unsafe fn to_ascii_nocheck(&self) -> Ascii {
@@ -212,6 +246,7 @@ impl AsciiCast<Ascii> for char {
212246
}
213247

214248
/// Trait for copyless casting to an ascii vector.
249+
#[experimental = "may be replaced by generic conversion traits"]
215250
pub trait OwnedAsciiCast {
216251
/// Check if convertible to ascii
217252
fn is_ascii(&self) -> bool;
@@ -241,6 +276,7 @@ pub trait OwnedAsciiCast {
241276
unsafe fn into_ascii_nocheck(self) -> Vec<Ascii>;
242277
}
243278

279+
#[experimental = "may be replaced by generic conversion traits"]
244280
impl OwnedAsciiCast for String {
245281
#[inline]
246282
fn is_ascii(&self) -> bool {
@@ -253,6 +289,7 @@ impl OwnedAsciiCast for String {
253289
}
254290
}
255291

292+
#[experimental = "may be replaced by generic conversion traits"]
256293
impl OwnedAsciiCast for Vec<u8> {
257294
#[inline]
258295
fn is_ascii(&self) -> bool {
@@ -274,6 +311,7 @@ impl OwnedAsciiCast for Vec<u8> {
274311

275312
/// Trait for converting an ascii type to a string. Needed to convert
276313
/// `&[Ascii]` to `&str`.
314+
#[experimental = "may be replaced by generic conversion traits"]
277315
pub trait AsciiStr for Sized? {
278316
/// Convert to a string.
279317
fn as_str_ascii<'a>(&'a self) -> &'a str;
@@ -283,19 +321,23 @@ pub trait AsciiStr for Sized? {
283321
fn to_lower(&self) -> Vec<Ascii>;
284322

285323
/// Convert to vector representing a lower cased ascii string.
324+
#[deprecated = "use iterators instead"]
286325
fn to_lowercase(&self) -> Vec<Ascii>;
287326

288327
/// Deprecated: use `to_uppercase`
289328
#[deprecated="renamed `to_uppercase`"]
290329
fn to_upper(&self) -> Vec<Ascii>;
291330

292331
/// Convert to vector representing a upper cased ascii string.
332+
#[deprecated = "use iterators instead"]
293333
fn to_uppercase(&self) -> Vec<Ascii>;
294334

295335
/// Compares two Ascii strings ignoring case.
336+
#[deprecated = "use iterators instead"]
296337
fn eq_ignore_case(&self, other: &[Ascii]) -> bool;
297338
}
298339

340+
#[experimental = "may be replaced by generic conversion traits"]
299341
impl AsciiStr for [Ascii] {
300342
#[inline]
301343
fn as_str_ascii<'a>(&'a self) -> &'a str {
@@ -336,11 +378,13 @@ impl IntoString for Vec<Ascii> {
336378
}
337379

338380
/// Trait to convert to an owned byte vector by consuming self
381+
#[experimental = "may be replaced by generic conversion traits"]
339382
pub trait IntoBytes {
340383
/// Converts to an owned byte vector by consuming self
341384
fn into_bytes(self) -> Vec<u8>;
342385
}
343386

387+
#[experimental = "may be replaced by generic conversion traits"]
344388
impl IntoBytes for Vec<Ascii> {
345389
fn into_bytes(self) -> Vec<u8> {
346390
unsafe {
@@ -358,6 +402,7 @@ impl IntoBytes for Vec<Ascii> {
358402

359403

360404
/// Extension methods for ASCII-subset only operations on owned strings
405+
#[experimental = "would prefer to do this in a more general way"]
361406
pub trait OwnedAsciiExt {
362407
/// Convert the string to ASCII upper case:
363408
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
@@ -371,6 +416,7 @@ pub trait OwnedAsciiExt {
371416
}
372417

373418
/// Extension methods for ASCII-subset only operations on string slices
419+
#[experimental = "would prefer to do this in a more general way"]
374420
pub trait AsciiExt<T> for Sized? {
375421
/// Makes a copy of the string in ASCII upper case:
376422
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
@@ -388,6 +434,7 @@ pub trait AsciiExt<T> for Sized? {
388434
fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
389435
}
390436

437+
#[experimental = "would prefer to do this in a more general way"]
391438
impl AsciiExt<String> for str {
392439
#[inline]
393440
fn to_ascii_upper(&self) -> String {
@@ -407,6 +454,7 @@ impl AsciiExt<String> for str {
407454
}
408455
}
409456

457+
#[experimental = "would prefer to do this in a more general way"]
410458
impl OwnedAsciiExt for String {
411459
#[inline]
412460
fn into_ascii_upper(self) -> String {
@@ -421,6 +469,7 @@ impl OwnedAsciiExt for String {
421469
}
422470
}
423471

472+
#[experimental = "would prefer to do this in a more general way"]
424473
impl AsciiExt<Vec<u8>> for [u8] {
425474
#[inline]
426475
fn to_ascii_upper(&self) -> Vec<u8> {
@@ -443,6 +492,7 @@ impl AsciiExt<Vec<u8>> for [u8] {
443492
}
444493
}
445494

495+
#[experimental = "would prefer to do this in a more general way"]
446496
impl OwnedAsciiExt for Vec<u8> {
447497
#[inline]
448498
fn into_ascii_upper(mut self) -> Vec<u8> {
@@ -472,6 +522,7 @@ impl OwnedAsciiExt for Vec<u8> {
472522
/// - Any other chars in the range [0x20,0x7e] are not escaped.
473523
/// - Any other chars are given hex escapes.
474524
/// - Unicode escapes are never generated by this function.
525+
#[unstable = "needs to be updated to use an iterator"]
475526
pub fn escape_default(c: u8, f: |u8|) {
476527
match c {
477528
b'\t' => { f(b'\\'); f(b't'); }
@@ -494,7 +545,7 @@ pub fn escape_default(c: u8, f: |u8|) {
494545
}
495546
}
496547

497-
pub static ASCII_LOWER_MAP: [u8, ..256] = [
548+
static ASCII_LOWER_MAP: [u8, ..256] = [
498549
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
499550
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
500551
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -533,7 +584,7 @@ pub static ASCII_LOWER_MAP: [u8, ..256] = [
533584
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
534585
];
535586

536-
pub static ASCII_UPPER_MAP: [u8, ..256] = [
587+
static ASCII_UPPER_MAP: [u8, ..256] = [
537588
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
538589
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
539590
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,

0 commit comments

Comments
 (0)