Skip to content

Commit 06c528d

Browse files
authored
Merge pull request #16090 from hashemi/non-breaking-spaces
[Parser] Detect non-breaking space (U+00A0) and offer a fix-it
2 parents 9ba05ef + 3c841b8 commit 06c528d

File tree

4 files changed

+70
-0
lines changed

4 files changed

+70
-0
lines changed

include/swift/AST/DiagnosticsParse.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ ERROR(lex_invalid_curly_quote,none,
121121
NOTE(lex_confusable_character,none,
122122
"unicode character '%0' looks similar to '%1'; did you mean to use '%1'?",
123123
(StringRef, StringRef))
124+
WARNING(lex_nonbreaking_space,none,
125+
"non-breaking space (U+00A0) used instead of regular space", ())
124126

125127
ERROR(lex_unterminated_block_comment,none,
126128
"unterminated '/*' comment", ())

lib/Parse/Lexer.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,12 @@ static bool isLeftBound(const char *tokBegin, const char *bufferBegin) {
682682
else
683683
return true;
684684

685+
case '\xA0':
686+
if (tokBegin - 1 != bufferBegin && tokBegin[-2] == '\xC2')
687+
return false; // Non-breaking whitespace (U+00A0)
688+
else
689+
return true;
690+
685691
default:
686692
return true;
687693
}
@@ -716,6 +722,12 @@ static bool isRightBound(const char *tokEnd, bool isLeftBound,
716722
else
717723
return true;
718724

725+
case '\xC2':
726+
if (tokEnd[1] == '\xA0')
727+
return false; // Non-breaking whitespace (U+00A0)
728+
else
729+
return true;
730+
719731
default:
720732
return true;
721733
}
@@ -1894,6 +1906,12 @@ bool Lexer::lexUnknown(bool EmitDiagnosticsIfToken) {
18941906
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), " ");
18951907
CurPtr = Tmp;
18961908
return false; // Skip presumed whitespace.
1909+
} else if (Codepoint == 0x000000A0) {
1910+
// Non-breaking whitespace (U+00A0)
1911+
diagnose(CurPtr - 1, diag::lex_nonbreaking_space)
1912+
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), " ");
1913+
CurPtr = Tmp;
1914+
return false;
18971915
} else if (Codepoint == 0x0000201D) {
18981916
// If this is an end curly quote, just diagnose it with a fixit hint.
18991917
if (EmitDiagnosticsIfToken) {

test/Parse/nonbreaking_space.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// RUN: %target-typecheck-verify-swift
2+
3+
let nonBreakingSpace1 = 3 // expected-warning {{non-breaking space (U+00A0) used instead of regular space}} {{22-24= }}
4+
5+
let nonBreakingSpace2 = 3 // expected-warning {{non-breaking space (U+00A0) used instead of regular space}} {{24-26= }}
6+
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: cat %s | sed -e 's/'$(echo -ne "\x5a")'/'$(echo -ne "\xc2\xa0")'/g' > %t.tmp
2+
// RUN: cp -f %t.tmp %t
3+
// RUN: %swift-syntax-test -input-source-filename %t -dump-full-tokens 2>&1 | %FileCheck %t
4+
let a =Z3Z // nbsp(Z)
5+
let bZ= 3Z
6+
let cZ=Z3
7+
8+
// CHECK: 4:8: warning: non-breaking space (U+00A0) used instead of regular space
9+
// CHECK: 4:11: warning: non-breaking space (U+00A0) used instead of regular space
10+
// CHECK: 5:6: warning: non-breaking space (U+00A0) used instead of regular space
11+
// CHECK: 5:11: warning: non-breaking space (U+00A0) used instead of regular space
12+
// CHECK: 6:6: warning: non-breaking space (U+00A0) used instead of regular space
13+
// CHECK: 6:9: warning: non-breaking space (U+00A0) used instead of regular space
14+
15+
// CHECK-LABEL: 4:7
16+
// CHECK-NEXT:(Token equal
17+
// CHECK-NEXT: (text="=")
18+
// CHECK-NEXT: (trivia garbageText \302\240))
19+
20+
// CHECK-LABEL: 4:10
21+
// CHECK-NEXT:(Token integer_literal
22+
// CHECK-NEXT: (text="3")
23+
// CHECK-NEXT: (trivia garbageText \302\240)
24+
// CHECK-NEXT: (trivia space 1))
25+
26+
// CHECK-LABEL: 5:5
27+
// CHECK-NEXT:(Token identifier
28+
// CHECK-NEXT: (text="b")
29+
// CHECK-NEXT: (trivia garbageText \302\240))
30+
31+
// CHECK-LABEL: 5:10
32+
// CHECK-NEXT:(Token integer_literal
33+
// CHECK-NEXT: (text="3")
34+
// CHECK-NEXT: (trivia garbageText \302\240)
35+
36+
// CHECK-LABEL: 6:5
37+
// CHECK-NEXT:(Token identifier
38+
// CHECK-NEXT: (text="c")
39+
// CHECK-NEXT: (trivia garbageText \302\240))
40+
41+
// CHECK-LABEL: 6:8
42+
// CHECK-NEXT:(Token equal
43+
// CHECK-NEXT: (text="=")
44+
// CHECK-NEXT: (trivia garbageText \302\240))

0 commit comments

Comments
 (0)