Skip to content

Commit 6d63d90

Browse files
robinkundeCodaFi
authored andcommitted
SR-331: Diagnostic notes and fixits for unicode confusables (swiftlang#9070)
1 parent 4b69240 commit 6d63d90

File tree

12 files changed

+9773
-6
lines changed

12 files changed

+9773
-6
lines changed

include/swift/AST/DiagnosticsParse.def

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ ERROR(lex_single_quote_string,none,
107107
"single-quoted string literal found, use '\"'", ())
108108
ERROR(lex_invalid_curly_quote,none,
109109
"unicode curly quote found, replace with '\"'", ())
110-
110+
NOTE(lex_confusable_character,none,
111+
"unicode character '%0' looks similar to '%1'; did you mean to use '%1'?",
112+
(StringRef, StringRef))
111113

112114
ERROR(lex_unterminated_block_comment,none,
113115
"unterminated '/*' comment", ())

include/swift/AST/DiagnosticsSema.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,10 @@ ERROR(unspaced_unary_operator,none,
618618

619619
ERROR(use_unresolved_identifier,none,
620620
"use of unresolved %select{identifier|operator}1 %0", (DeclName, bool))
621+
NOTE(confusable_character,none,
622+
"%select{identifier|operator}0 '%1' contains possibly confused characters; "
623+
"did you mean to use '%2'?",
624+
(bool, StringRef, StringRef))
621625
ERROR(use_undeclared_type,none,
622626
"use of undeclared type %0", (Identifier))
623627
ERROR(use_undeclared_type_did_you_mean,none,

include/swift/Parse/Confusables.def

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
//===--- Confusables.def - Confusable unicode characters ------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// CONFUSABLE(CONFUSABLE_POINT, BASEPOINT)
14+
15+
CONFUSABLE(0x2010, 0x2d)
16+
CONFUSABLE(0x2011, 0x2d)
17+
CONFUSABLE(0x2012, 0x2d)
18+
CONFUSABLE(0x2013, 0x2d)
19+
CONFUSABLE(0xfe58, 0x2d)
20+
CONFUSABLE(0x6d4, 0x2d)
21+
CONFUSABLE(0x2043, 0x2d)
22+
CONFUSABLE(0x2d7, 0x2d)
23+
CONFUSABLE(0x2212, 0x2d)
24+
CONFUSABLE(0x2796, 0x2d)
25+
CONFUSABLE(0x2cba, 0x2d)
26+
CONFUSABLE(0x60d, 0x2c)
27+
CONFUSABLE(0x66b, 0x2c)
28+
CONFUSABLE(0x201a, 0x2c)
29+
CONFUSABLE(0xb8, 0x2c)
30+
CONFUSABLE(0xa4f9, 0x2c)
31+
CONFUSABLE(0x903, 0x3a)
32+
CONFUSABLE(0xa83, 0x3a)
33+
CONFUSABLE(0xff1a, 0x3a)
34+
CONFUSABLE(0x589, 0x3a)
35+
CONFUSABLE(0x703, 0x3a)
36+
CONFUSABLE(0x704, 0x3a)
37+
CONFUSABLE(0x16ec, 0x3a)
38+
CONFUSABLE(0xfe30, 0x3a)
39+
CONFUSABLE(0x1803, 0x3a)
40+
CONFUSABLE(0x1809, 0x3a)
41+
CONFUSABLE(0x205a, 0x3a)
42+
CONFUSABLE(0x5c3, 0x3a)
43+
CONFUSABLE(0x2f8, 0x3a)
44+
CONFUSABLE(0xa789, 0x3a)
45+
CONFUSABLE(0x2236, 0x3a)
46+
CONFUSABLE(0x2d0, 0x3a)
47+
CONFUSABLE(0xa4fd, 0x3a)
48+
CONFUSABLE(0xff01, 0x21)
49+
CONFUSABLE(0x1c3, 0x21)
50+
CONFUSABLE(0x2d51, 0x21)
51+
CONFUSABLE(0x294, 0x3f)
52+
CONFUSABLE(0x241, 0x3f)
53+
CONFUSABLE(0x97d, 0x3f)
54+
CONFUSABLE(0x13ae, 0x3f)
55+
CONFUSABLE(0xa6eb, 0x3f)
56+
CONFUSABLE(0x1d16d, 0x2e)
57+
CONFUSABLE(0x2024, 0x2e)
58+
CONFUSABLE(0x701, 0x2e)
59+
CONFUSABLE(0x702, 0x2e)
60+
CONFUSABLE(0xa60e, 0x2e)
61+
CONFUSABLE(0x10a50, 0x2e)
62+
CONFUSABLE(0x660, 0x2e)
63+
CONFUSABLE(0x6f0, 0x2e)
64+
CONFUSABLE(0xa4f8, 0x2e)
65+
CONFUSABLE(0xff3b, 0x28)
66+
CONFUSABLE(0x2768, 0x28)
67+
CONFUSABLE(0x2772, 0x28)
68+
CONFUSABLE(0x3014, 0x28)
69+
CONFUSABLE(0xfd3e, 0x28)
70+
CONFUSABLE(0xff3d, 0x29)
71+
CONFUSABLE(0x2769, 0x29)
72+
CONFUSABLE(0x2773, 0x29)
73+
CONFUSABLE(0x3015, 0x29)
74+
CONFUSABLE(0xfd3f, 0x29)
75+
CONFUSABLE(0x2774, 0x7b)
76+
CONFUSABLE(0x1d114, 0x7b)
77+
CONFUSABLE(0x2775, 0x7d)
78+
CONFUSABLE(0x204e, 0x2a)
79+
CONFUSABLE(0x66d, 0x2a)
80+
CONFUSABLE(0x2217, 0x2a)
81+
CONFUSABLE(0x1031f, 0x2a)
82+
CONFUSABLE(0x1735, 0x2f)
83+
CONFUSABLE(0x2041, 0x2f)
84+
CONFUSABLE(0x2215, 0x2f)
85+
CONFUSABLE(0x2044, 0x2f)
86+
CONFUSABLE(0x2571, 0x2f)
87+
CONFUSABLE(0x27cb, 0x2f)
88+
CONFUSABLE(0x29f8, 0x2f)
89+
CONFUSABLE(0x1d23a, 0x2f)
90+
CONFUSABLE(0x31d3, 0x2f)
91+
CONFUSABLE(0x3033, 0x2f)
92+
CONFUSABLE(0x2cc6, 0x2f)
93+
CONFUSABLE(0x30ce, 0x2f)
94+
CONFUSABLE(0x4e3f, 0x2f)
95+
CONFUSABLE(0x2f03, 0x2f)
96+
CONFUSABLE(0xff3c, 0x5c)
97+
CONFUSABLE(0xfe68, 0x5c)
98+
CONFUSABLE(0x2216, 0x5c)
99+
CONFUSABLE(0x27cd, 0x5c)
100+
CONFUSABLE(0x29f5, 0x5c)
101+
CONFUSABLE(0x29f9, 0x5c)
102+
CONFUSABLE(0x1d20f, 0x5c)
103+
CONFUSABLE(0x1d23b, 0x5c)
104+
CONFUSABLE(0x31d4, 0x5c)
105+
CONFUSABLE(0x4e36, 0x5c)
106+
CONFUSABLE(0x2f02, 0x5c)
107+
CONFUSABLE(0xa778, 0x26)
108+
CONFUSABLE(0x16ed, 0x2b)
109+
CONFUSABLE(0x2795, 0x2b)
110+
CONFUSABLE(0x1029b, 0x2b)
111+
CONFUSABLE(0x2039, 0x3c)
112+
CONFUSABLE(0x276e, 0x3c)
113+
CONFUSABLE(0x2c2, 0x3c)
114+
CONFUSABLE(0x1d236, 0x3c)
115+
CONFUSABLE(0x1438, 0x3c)
116+
CONFUSABLE(0x16b2, 0x3c)
117+
CONFUSABLE(0x1400, 0x3d)
118+
CONFUSABLE(0x2e40, 0x3d)
119+
CONFUSABLE(0x30a0, 0x3d)
120+
CONFUSABLE(0xa4ff, 0x3d)
121+
CONFUSABLE(0x203a, 0x3e)
122+
CONFUSABLE(0x276f, 0x3e)
123+
CONFUSABLE(0x2c3, 0x3e)
124+
CONFUSABLE(0x1d237, 0x3e)
125+
CONFUSABLE(0x1433, 0x3e)
126+
127+
#undef CONFUSABLE

include/swift/Parse/Confusables.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===--- Confusables.h - Swift Confusable Character Diagnostics -*- C++ -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See http://swift.org/LICENSE.txt for license information
9+
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef SWIFT_CONFUSABLES_H
14+
#define SWIFT_CONFUSABLES_H
15+
16+
#include <stdint.h>
17+
18+
namespace swift {
19+
namespace confusable {
20+
/// Given a UTF-8 codepoint, determines whether it appears on the Unicode
21+
/// specification table of confusable characters and maps to punctuation,
22+
/// and either returns either the expected ASCII character or 0.
23+
char tryConvertConfusableCharacterToASCII(uint32_t codepoint);
24+
}
25+
}
26+
27+
#endif

include/swift/Parse/Lexer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@
2626
#include "llvm/Support/SaveAndRestore.h"
2727

2828
namespace swift {
29+
/// Given a pointer to the starting byte of a UTF8 character, validate it and
30+
/// advance the lexer past it. This returns the encoded character or ~0U if
31+
/// the encoding is invalid.
32+
uint32_t validateUTF8CharacterAndAdvance(const char *&Ptr, const char *End);
33+
2934
class DiagnosticEngine;
3035
class InFlightDiagnostic;
3136
class LangOptions;

lib/Parse/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_swift_library(swiftParse STATIC
2+
Confusables.cpp
23
Lexer.cpp
34
ParseDecl.cpp
45
ParseExpr.cpp

lib/Parse/Confusables.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===--- Confusables.cpp - Swift Confusable Character Diagnostics ---------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See http://swift.org/LICENSE.txt for license information
9+
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "swift/Parse/Confusables.h"
14+
15+
char swift::confusable::tryConvertConfusableCharacterToASCII(uint32_t codepoint) {
16+
switch (codepoint) {
17+
#define CONFUSABLE(CONFUSABLE_POINT, BASEPOINT) \
18+
case CONFUSABLE_POINT: return BASEPOINT;
19+
#include "swift/Parse/Confusables.def"
20+
default: return 0;
21+
}
22+
}

lib/Parse/Lexer.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//
1515
//===----------------------------------------------------------------------===//
1616

17+
#include "swift/Parse/Confusables.h"
1718
#include "swift/Parse/Lexer.h"
1819
#include "swift/AST/DiagnosticsParse.h"
1920
#include "swift/AST/Identifier.h"
@@ -49,7 +50,6 @@ using clang::isWhitespace;
4950
/// true if it is an erroneous code point.
5051
static bool EncodeToUTF8(unsigned CharValue,
5152
SmallVectorImpl<char> &Result) {
52-
assert(CharValue >= 0x80 && "Single-byte encoding should be already handled");
5353
// Number of bits in the value, ignoring leading zeros.
5454
unsigned NumBits = 32-llvm::countLeadingZeros(CharValue);
5555

@@ -105,7 +105,7 @@ static bool isStartOfUTF8Character(unsigned char C) {
105105
/// validateUTF8CharacterAndAdvance - Given a pointer to the starting byte of a
106106
/// UTF8 character, validate it and advance the lexer past it. This returns the
107107
/// encoded character or ~0U if the encoding is invalid.
108-
static uint32_t validateUTF8CharacterAndAdvance(const char *&Ptr,
108+
uint32_t swift::validateUTF8CharacterAndAdvance(const char *&Ptr,
109109
const char *End) {
110110
if (Ptr >= End)
111111
return ~0U;
@@ -1870,6 +1870,22 @@ void Lexer::lexImpl() {
18701870
} else {
18711871
diagnose(CurPtr-1, diag::lex_invalid_character)
18721872
.fixItReplaceChars(getSourceLoc(CurPtr-1), getSourceLoc(tmp), " ");
1873+
1874+
char expectedCodepoint;
1875+
if ((expectedCodepoint =
1876+
confusable::tryConvertConfusableCharacterToASCII(codepoint))) {
1877+
1878+
llvm::SmallString<4> confusedChar;
1879+
EncodeToUTF8(codepoint, confusedChar);
1880+
llvm::SmallString<1> expectedChar;
1881+
expectedChar += expectedCodepoint;
1882+
diagnose(CurPtr-1, diag::lex_confusable_character,
1883+
confusedChar, expectedChar)
1884+
.fixItReplaceChars(getSourceLoc(CurPtr-1),
1885+
getSourceLoc(tmp),
1886+
expectedChar);
1887+
}
1888+
18731889
CurPtr = tmp;
18741890
goto Restart; // Skip presumed whitespace.
18751891
}

lib/Sema/TypeCheckConstraints.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "swift/AST/PrettyStackTrace.h"
3030
#include "swift/AST/SubstitutionMap.h"
3131
#include "swift/AST/TypeCheckerDebugConsumer.h"
32+
#include "swift/Parse/Confusables.h"
3233
#include "swift/Parse/Lexer.h"
3334
#include "llvm/ADT/APInt.h"
3435
#include "llvm/ADT/DenseMap.h"
@@ -454,9 +455,39 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
454455
diagnose(Loc, diag::use_unresolved_identifier, Name, Name.isOperator())
455456
.highlight(UDRE->getSourceRange());
456457

457-
// Note all the correction candidates.
458-
for (auto &result : Lookup) {
459-
noteTypoCorrection(Name, nameLoc, result);
458+
const char *buffer = Name.getBaseName().get();
459+
llvm::SmallString<64> expectedIdentifier;
460+
bool isConfused = false;
461+
uint32_t codepoint;
462+
int offset = 0;
463+
while ((codepoint = validateUTF8CharacterAndAdvance(buffer,
464+
buffer +
465+
strlen(buffer)))
466+
!= ~0U) {
467+
int length = (buffer - Name.getBaseName().get()) - offset;
468+
char expectedCodepoint;
469+
if ((expectedCodepoint =
470+
confusable::tryConvertConfusableCharacterToASCII(codepoint))) {
471+
isConfused = true;
472+
expectedIdentifier += expectedCodepoint;
473+
} else {
474+
expectedIdentifier += (char)codepoint;
475+
}
476+
477+
offset += length;
478+
}
479+
480+
if (isConfused) {
481+
diagnose(Loc, diag::confusable_character,
482+
UDRE->getName().isOperator(),
483+
Name.getBaseName().str(), expectedIdentifier)
484+
.fixItReplaceChars(Loc, Loc.getAdvancedLoc(Name.getBaseName().getLength()),
485+
expectedIdentifier);
486+
} else {
487+
// Note all the correction candidates.
488+
for (auto &result : Lookup) {
489+
noteTypoCorrection(Name, nameLoc, result);
490+
}
460491
}
461492

462493
// TODO: consider recovering from here. We may want some way to suppress

test/Parse/confusables.swift

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// RUN: %target-typecheck-verify-swift
2+
3+
// expected-error @+4 {{type annotation missing in pattern}}
4+
// expected-error @+3 {{use of unresolved operator '⁚'}}
5+
// expected-error @+2 {{operator with postfix spacing cannot start a subexpression}}
6+
// expected-error @+1 {{consecutive statements on a line must be separated by ';'}}
7+
let number Int // expected-note {{operator '⁚' contains possibly confused characters; did you mean to use ':'?}} {{11-14=:}}
8+
9+
// expected-warning @+3 2 {{integer literal is unused}}
10+
// expected-error @+2 2 {{invalid character in source file}}
11+
// expected-error @+1 {{consecutive statements on a line must be separated by ';'}}
12+
55 // expected-note 2 {{unicode character '‒' looks similar to '-'; did you mean to use '-'?}} {{3-6=-}}
13+
14+
// expected-error @+2 {{use of unresolved identifier 'ꝸꝸꝸ'}}
15+
// expected-error @+1 {{expected ',' separator}}
16+
if (true ꝸꝸꝸ false) {} // expected-note {{identifier 'ꝸꝸꝸ' contains possibly confused characters; did you mean to use '&&&'?}} {{10-19=&&&}}
17+
18+
// expected-error @+4 {{invalid character in source file}}
19+
// expected-error @+3 {{expected ',' separator}}
20+
// expected-error @+2 {{binary operator '==' cannot be applied to operands of type '(Int, Int)' and 'Int'}}
21+
// expected-note @+1 {{expected an argument list of type '(Int, Int)'}}
22+
if (55) == 0 {} // expected-note {{unicode character '‒' looks similar to '-'; did you mean to use '-'?}} {{7-10=-}}

0 commit comments

Comments
 (0)