Skip to content

Commit 4b577e2

Browse files
committed
Support c string literals
1 parent 099b5b3 commit 4b577e2

File tree

21 files changed

+176
-34
lines changed

21 files changed

+176
-34
lines changed

crates/hir-def/src/body/pretty.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,7 @@ impl<'a> Printer<'a> {
611611
match literal {
612612
Literal::String(it) => w!(self, "{:?}", it),
613613
Literal::ByteString(it) => w!(self, "\"{}\"", it.escape_ascii()),
614+
Literal::CString(it) => w!(self, "\"{}\\0\"", it),
614615
Literal::Char(it) => w!(self, "'{}'", it.escape_debug()),
615616
Literal::Bool(it) => w!(self, "{}", it),
616617
Literal::Int(i, suffix) => {

crates/hir-def/src/hir.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ impl fmt::Display for FloatTypeWrapper {
8585
pub enum Literal {
8686
String(Box<str>),
8787
ByteString(Box<[u8]>),
88+
CString(Box<str>),
8889
Char(char),
8990
Bool(bool),
9091
Int(i128, Option<BuiltinInt>),
@@ -135,6 +136,10 @@ impl From<ast::LiteralKind> for Literal {
135136
let text = s.value().map(Box::from).unwrap_or_else(Default::default);
136137
Literal::String(text)
137138
}
139+
LiteralKind::CString(s) => {
140+
let text = s.value().map(Box::from).unwrap_or_else(Default::default);
141+
Literal::CString(text)
142+
}
138143
LiteralKind::Byte(b) => {
139144
Literal::Uint(b.value().unwrap_or_default() as u128, Some(BuiltinUint::U8))
140145
}

crates/hir-ty/src/infer/expr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ impl<'a> InferenceContext<'a> {
815815
Expr::Array(array) => self.infer_expr_array(array, expected),
816816
Expr::Literal(lit) => match lit {
817817
Literal::Bool(..) => self.result.standard_types.bool_.clone(),
818-
Literal::String(..) => {
818+
Literal::String(..) | Literal::CString(..) => {
819819
TyKind::Ref(Mutability::Not, static_lifetime(), TyKind::Str.intern(Interner))
820820
.intern(Interner)
821821
}

crates/hir-ty/src/infer/pat.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,9 +428,10 @@ fn is_non_ref_pat(body: &hir_def::body::Body, pat: PatId) -> bool {
428428
// FIXME: ConstBlock/Path/Lit might actually evaluate to ref, but inference is unimplemented.
429429
Pat::Path(..) => true,
430430
Pat::ConstBlock(..) => true,
431-
Pat::Lit(expr) => {
432-
!matches!(body[*expr], Expr::Literal(Literal::String(..) | Literal::ByteString(..)))
433-
}
431+
Pat::Lit(expr) => !matches!(
432+
body[*expr],
433+
Expr::Literal(Literal::String(..) | Literal::CString(..) | Literal::ByteString(..))
434+
),
434435
Pat::Wild | Pat::Bind { .. } | Pat::Ref { .. } | Pat::Box { .. } | Pat::Missing => false,
435436
}
436437
}

crates/hir-ty/src/mir/lower.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,15 +1112,24 @@ impl<'ctx> MirLowerCtx<'ctx> {
11121112
let bytes = match l {
11131113
hir_def::hir::Literal::String(b) => {
11141114
let b = b.as_bytes();
1115-
let mut data = vec![];
1115+
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
11161116
data.extend(0usize.to_le_bytes());
11171117
data.extend(b.len().to_le_bytes());
11181118
let mut mm = MemoryMap::default();
11191119
mm.insert(0, b.to_vec());
11201120
return Ok(Operand::from_concrete_const(data, mm, ty));
11211121
}
1122+
hir_def::hir::Literal::CString(b) => {
1123+
let b = b.as_bytes();
1124+
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
1125+
data.extend(0usize.to_le_bytes());
1126+
data.extend(b.len().to_le_bytes());
1127+
let mut mm = MemoryMap::default();
1128+
mm.insert(0, b.iter().copied().chain(iter::once(0)).collect::<Vec<_>>());
1129+
return Ok(Operand::from_concrete_const(data, mm, ty));
1130+
}
11221131
hir_def::hir::Literal::ByteString(b) => {
1123-
let mut data = vec![];
1132+
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
11241133
data.extend(0usize.to_le_bytes());
11251134
data.extend(b.len().to_le_bytes());
11261135
let mut mm = MemoryMap::default();

crates/ide-assists/src/handlers/raw_string.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use crate::{utils::required_hashes, AssistContext, AssistId, AssistKind, Assists
2020
// }
2121
// ```
2222
pub(crate) fn make_raw_string(acc: &mut Assists, ctx: &AssistContext<'_>) -> Option<()> {
23+
// FIXME: This should support byte and c strings as well.
2324
let token = ctx.find_token_at_offset::<ast::String>()?;
2425
if token.is_raw() {
2526
return None;

crates/ide/src/extend_selection.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ fn try_extend_selection(
3939
) -> Option<TextRange> {
4040
let range = frange.range;
4141

42-
let string_kinds = [COMMENT, STRING, BYTE_STRING];
42+
let string_kinds = [COMMENT, STRING, BYTE_STRING, C_STRING];
4343
let list_kinds = [
4444
RECORD_PAT_FIELD_LIST,
4545
MATCH_ARM_LIST,

crates/ide/src/syntax_highlighting.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ mod tests;
1616
use hir::{Name, Semantics};
1717
use ide_db::{FxHashMap, RootDatabase, SymbolKind};
1818
use syntax::{
19-
ast, AstNode, AstToken, NodeOrToken, SyntaxKind::*, SyntaxNode, TextRange, WalkEvent, T,
19+
ast::{self, IsString},
20+
AstNode, AstToken, NodeOrToken,
21+
SyntaxKind::*,
22+
SyntaxNode, TextRange, WalkEvent, T,
2023
};
2124

2225
use crate::{
@@ -440,7 +443,17 @@ fn traverse(
440443
&& ast::ByteString::can_cast(descended_token.kind())
441444
{
442445
if let Some(byte_string) = ast::ByteString::cast(token) {
443-
highlight_escape_string(hl, &byte_string, range.start());
446+
if !byte_string.is_raw() {
447+
highlight_escape_string(hl, &byte_string, range.start());
448+
}
449+
}
450+
} else if ast::CString::can_cast(token.kind())
451+
&& ast::CString::can_cast(descended_token.kind())
452+
{
453+
if let Some(c_string) = ast::CString::cast(token) {
454+
if !c_string.is_raw() {
455+
highlight_escape_string(hl, &c_string, range.start());
456+
}
444457
}
445458
} else if ast::Char::can_cast(token.kind())
446459
&& ast::Char::can_cast(descended_token.kind())

crates/ide/src/syntax_highlighting/highlight.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub(super) fn token(sema: &Semantics<'_, RootDatabase>, token: SyntaxToken) -> O
2626
}
2727

2828
let highlight: Highlight = match token.kind() {
29-
STRING | BYTE_STRING => HlTag::StringLiteral.into(),
29+
STRING | BYTE_STRING | C_STRING => HlTag::StringLiteral.into(),
3030
INT_NUMBER if token.parent_ancestors().nth(1).map(|it| it.kind()) == Some(FIELD_EXPR) => {
3131
SymbolKind::Field.into()
3232
}

crates/ide/src/syntax_tree.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
use ide_db::base_db::{FileId, SourceDatabase};
2-
use ide_db::RootDatabase;
1+
use ide_db::{
2+
base_db::{FileId, SourceDatabase},
3+
RootDatabase,
4+
};
35
use syntax::{
46
AstNode, NodeOrToken, SourceFile, SyntaxKind::STRING, SyntaxToken, TextRange, TextSize,
57
};

crates/parser/src/grammar/expressions/atom.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use super::*;
1212
// let _ = r"d";
1313
// let _ = b"e";
1414
// let _ = br"f";
15+
// let _ = c"g";
16+
// let _ = cr"h";
1517
// }
1618
pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[
1719
T![true],
@@ -22,6 +24,7 @@ pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[
2224
CHAR,
2325
STRING,
2426
BYTE_STRING,
27+
C_STRING,
2528
]);
2629

2730
pub(crate) fn literal(p: &mut Parser<'_>) -> Option<CompletedMarker> {

crates/parser/src/grammar/generic_args.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ const GENERIC_ARG_FIRST: TokenSet = TokenSet::new(&[
2828
BYTE,
2929
STRING,
3030
BYTE_STRING,
31+
C_STRING,
3132
])
3233
.union(types::TYPE_FIRST);
3334

crates/parser/src/lexed_str.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ impl<'a> Converter<'a> {
277277
if !terminated {
278278
err = "Missing trailing `\"` symbol to terminate the string literal";
279279
}
280-
STRING
280+
C_STRING
281281
}
282282
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
283283
if n_hashes.is_none() {
@@ -295,7 +295,7 @@ impl<'a> Converter<'a> {
295295
if n_hashes.is_none() {
296296
err = "Invalid raw string literal";
297297
}
298-
STRING
298+
C_STRING
299299
}
300300
};
301301

crates/parser/test_data/parser/inline/ok/0085_expr_literals.rast

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,30 @@ SOURCE_FILE
131131
LITERAL
132132
BYTE_STRING "br\"f\""
133133
SEMICOLON ";"
134+
WHITESPACE "\n "
135+
LET_STMT
136+
LET_KW "let"
137+
WHITESPACE " "
138+
WILDCARD_PAT
139+
UNDERSCORE "_"
140+
WHITESPACE " "
141+
EQ "="
142+
WHITESPACE " "
143+
LITERAL
144+
C_STRING "c\"g\""
145+
SEMICOLON ";"
146+
WHITESPACE "\n "
147+
LET_STMT
148+
LET_KW "let"
149+
WHITESPACE " "
150+
WILDCARD_PAT
151+
UNDERSCORE "_"
152+
WHITESPACE " "
153+
EQ "="
154+
WHITESPACE " "
155+
LITERAL
156+
C_STRING "cr\"h\""
157+
SEMICOLON ";"
134158
WHITESPACE "\n"
135159
R_CURLY "}"
136160
WHITESPACE "\n"

crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ fn foo() {
99
let _ = r"d";
1010
let _ = b"e";
1111
let _ = br"f";
12+
let _ = c"g";
13+
let _ = cr"h";
1214
}

crates/syntax/src/ast/expr_ext.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ impl ast::ArrayExpr {
288288
pub enum LiteralKind {
289289
String(ast::String),
290290
ByteString(ast::ByteString),
291+
CString(ast::CString),
291292
IntNumber(ast::IntNumber),
292293
FloatNumber(ast::FloatNumber),
293294
Char(ast::Char),
@@ -319,6 +320,9 @@ impl ast::Literal {
319320
if let Some(t) = ast::ByteString::cast(token.clone()) {
320321
return LiteralKind::ByteString(t);
321322
}
323+
if let Some(t) = ast::CString::cast(token.clone()) {
324+
return LiteralKind::CString(t);
325+
}
322326
if let Some(t) = ast::Char::cast(token.clone()) {
323327
return LiteralKind::Char(t);
324328
}

crates/syntax/src/ast/generated/tokens.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,27 @@ impl AstToken for ByteString {
9090
fn syntax(&self) -> &SyntaxToken { &self.syntax }
9191
}
9292

93+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
94+
pub struct CString {
95+
pub(crate) syntax: SyntaxToken,
96+
}
97+
impl std::fmt::Display for CString {
98+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99+
std::fmt::Display::fmt(&self.syntax, f)
100+
}
101+
}
102+
impl AstToken for CString {
103+
fn can_cast(kind: SyntaxKind) -> bool { kind == C_STRING }
104+
fn cast(syntax: SyntaxToken) -> Option<Self> {
105+
if Self::can_cast(syntax.kind()) {
106+
Some(Self { syntax })
107+
} else {
108+
None
109+
}
110+
}
111+
fn syntax(&self) -> &SyntaxToken { &self.syntax }
112+
}
113+
93114
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
94115
pub struct IntNumber {
95116
pub(crate) syntax: SyntaxToken,

crates/syntax/src/ast/token_ext.rs

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ impl QuoteOffsets {
145145
}
146146

147147
pub trait IsString: AstToken {
148+
const RAW_PREFIX: &'static str;
149+
fn is_raw(&self) -> bool {
150+
self.text().starts_with(Self::RAW_PREFIX)
151+
}
148152
fn quote_offsets(&self) -> Option<QuoteOffsets> {
149153
let text = self.text();
150154
let offsets = QuoteOffsets::new(text)?;
@@ -183,20 +187,18 @@ pub trait IsString: AstToken {
183187
cb(text_range + offset, unescaped_char);
184188
});
185189
}
186-
}
187-
188-
impl IsString for ast::String {}
189-
190-
impl ast::String {
191-
pub fn is_raw(&self) -> bool {
192-
self.text().starts_with('r')
193-
}
194-
pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
190+
fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
195191
let contents_range = self.text_range_between_quotes()?;
196192
assert!(TextRange::up_to(contents_range.len()).contains_range(range));
197193
Some(range + contents_range.start())
198194
}
195+
}
199196

197+
impl IsString for ast::String {
198+
const RAW_PREFIX: &'static str = "r";
199+
}
200+
201+
impl ast::String {
200202
pub fn value(&self) -> Option<Cow<'_, str>> {
201203
if self.is_raw() {
202204
let text = self.text();
@@ -235,13 +237,11 @@ impl ast::String {
235237
}
236238
}
237239

238-
impl IsString for ast::ByteString {}
240+
impl IsString for ast::ByteString {
241+
const RAW_PREFIX: &'static str = "br";
242+
}
239243

240244
impl ast::ByteString {
241-
pub fn is_raw(&self) -> bool {
242-
self.text().starts_with("br")
243-
}
244-
245245
pub fn value(&self) -> Option<Cow<'_, [u8]>> {
246246
if self.is_raw() {
247247
let text = self.text();
@@ -280,6 +280,49 @@ impl ast::ByteString {
280280
}
281281
}
282282

283+
impl IsString for ast::CString {
284+
const RAW_PREFIX: &'static str = "cr";
285+
}
286+
287+
impl ast::CString {
288+
pub fn value(&self) -> Option<Cow<'_, str>> {
289+
if self.is_raw() {
290+
let text = self.text();
291+
let text =
292+
&text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
293+
return Some(Cow::Borrowed(text));
294+
}
295+
296+
let text = self.text();
297+
let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
298+
299+
let mut buf = String::new();
300+
let mut prev_end = 0;
301+
let mut has_error = false;
302+
unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
303+
unescaped_char,
304+
buf.capacity() == 0,
305+
) {
306+
(Ok(c), false) => buf.push(c),
307+
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
308+
prev_end = char_range.end
309+
}
310+
(Ok(c), true) => {
311+
buf.reserve_exact(text.len());
312+
buf.push_str(&text[..prev_end]);
313+
buf.push(c);
314+
}
315+
(Err(_), _) => has_error = true,
316+
});
317+
318+
match (has_error, buf.capacity() == 0) {
319+
(true, _) => None,
320+
(false, true) => Some(Cow::Borrowed(text)),
321+
(false, false) => Some(Cow::Owned(buf)),
322+
}
323+
}
324+
}
325+
283326
impl ast::IntNumber {
284327
pub fn radix(&self) -> Radix {
285328
match self.text().get(..2).unwrap_or_default() {

crates/syntax/src/parsing/reparsing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ fn reparse_token(
3939
let prev_token = root.covering_element(edit.delete).as_token()?.clone();
4040
let prev_token_kind = prev_token.kind();
4141
match prev_token_kind {
42-
WHITESPACE | COMMENT | IDENT | STRING => {
42+
WHITESPACE | COMMENT | IDENT | STRING | BYTE_STRING | C_STRING => {
4343
if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
4444
// removing a new line may extends previous token
4545
let deleted_range = edit.delete - prev_token.text_range().start();

0 commit comments

Comments
 (0)