Skip to content

Commit c9ef14a

Browse files
committed
XXX: no escapes in raw strings
1 parent 7e452c1 commit c9ef14a

File tree

1 file changed

+30
-43
lines changed

1 file changed

+30
-43
lines changed

compiler/rustc_ast/src/util/literal.rs

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ impl LitKind {
7777
// new symbol because the string in the LitKind is different to the
7878
// string in the token.
7979
let s = symbol.as_str();
80+
// Vanilla strings are so common we optimize for the common case where no chars
81+
// requiring special behaviour are present.
8082
let symbol = if s.contains(['\\', '\r']) {
8183
let mut buf = String::with_capacity(s.len());
8284
let mut error = Ok(());
@@ -104,27 +106,20 @@ impl LitKind {
104106
LitKind::Str(symbol, ast::StrStyle::Cooked)
105107
}
106108
token::StrRaw(n) => {
107-
// Ditto.
108-
let s = symbol.as_str();
109-
let symbol =
110-
if s.contains('\r') {
111-
let mut buf = String::with_capacity(s.len());
112-
let mut error = Ok(());
113-
unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| {
114-
match unescaped_char {
115-
Ok(c) => buf.push(c),
116-
Err(err) => {
117-
if err.is_fatal() {
118-
error = Err(LitError::LexerError);
119-
}
120-
}
109+
// Raw strings have no escapes, so we only need to check for invalid chars, and we
110+
// can reuse the symbol on success.
111+
let mut error = Ok(());
112+
unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
113+
match unescaped_char {
114+
Ok(_) => {}
115+
Err(err) => {
116+
if err.is_fatal() {
117+
error = Err(LitError::LexerError);
121118
}
122-
});
123-
error?;
124-
Symbol::intern(&buf)
125-
} else {
126-
symbol
127-
};
119+
}
120+
}
121+
});
122+
error?;
128123
LitKind::Str(symbol, ast::StrStyle::Raw(n))
129124
}
130125
token::ByteStr => {
@@ -143,25 +138,19 @@ impl LitKind {
143138
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
144139
}
145140
token::ByteStrRaw(n) => {
141+
// Raw strings have no escapes, so we only need to check for invalid chars, and we
142+
// can convert the symbol directly to a `Lrc<u8>` on success.
146143
let s = symbol.as_str();
147-
let bytes = if s.contains('\r') {
148-
let mut buf = Vec::with_capacity(s.len());
149-
let mut error = Ok(());
150-
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
151-
Ok(c) => buf.push(byte_from_char(c)),
152-
Err(err) => {
153-
if err.is_fatal() {
154-
error = Err(LitError::LexerError);
155-
}
144+
let mut error = Ok(());
145+
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
146+
Ok(_) => {}
147+
Err(err) => {
148+
if err.is_fatal() {
149+
error = Err(LitError::LexerError);
156150
}
157-
});
158-
error?;
159-
buf
160-
} else {
161-
symbol.to_string().into_bytes()
162-
};
163-
164-
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
151+
}
152+
});
153+
LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
165154
}
166155
token::CStr => {
167156
let s = symbol.as_str();
@@ -187,25 +176,23 @@ impl LitKind {
187176
LitKind::CStr(buf.into(), StrStyle::Cooked)
188177
}
189178
token::CStrRaw(n) => {
179+
// Raw strings have no escapes, so we only need to check for invalid chars, and we
180+
// can convert the symbol directly to a `Lrc<u8>` on success.
190181
let s = symbol.as_str();
191-
let mut buf = Vec::with_capacity(s.len());
192182
let mut error = Ok(());
193183
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
194184
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
195185
error = Err(LitError::NulInCStr(span));
196186
}
197-
Ok(CStrUnit::Byte(b)) => buf.push(b),
198-
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
199-
Ok(CStrUnit::Char(c)) => {
200-
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
201-
}
187+
Ok(_) => {}
202188
Err(err) => {
203189
if err.is_fatal() {
204190
error = Err(LitError::LexerError);
205191
}
206192
}
207193
});
208194
error?;
195+
let mut buf = s.to_owned().into_bytes();
209196
buf.push(0);
210197
LitKind::CStr(buf.into(), StrStyle::Raw(n))
211198
}

0 commit comments

Comments
 (0)