Skip to content

SR-331: Diagnostic notes and fixits for unicode confusables #9070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/swift/AST/DiagnosticsParse.def
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ ERROR(lex_single_quote_string,none,
"single-quoted string literal found, use '\"'", ())
ERROR(lex_invalid_curly_quote,none,
"unicode curly quote found, replace with '\"'", ())

NOTE(lex_confusable_character,none,
"unicode character '%0' looks similar to '%1'; did you mean to use '%1'?",
(StringRef, StringRef))

ERROR(lex_unterminated_block_comment,none,
"unterminated '/*' comment", ())
Expand Down
4 changes: 4 additions & 0 deletions include/swift/AST/DiagnosticsSema.def
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,10 @@ ERROR(unspaced_unary_operator,none,

ERROR(use_unresolved_identifier,none,
"use of unresolved %select{identifier|operator}1 %0", (DeclName, bool))
NOTE(confusable_character,none,
"%select{identifier|operator}0 '%1' contains possibly confused characters; "
"did you mean to use '%2'?",
(bool, StringRef, StringRef))
ERROR(use_undeclared_type,none,
"use of undeclared type %0", (Identifier))
ERROR(use_undeclared_type_did_you_mean,none,
Expand Down
127 changes: 127 additions & 0 deletions include/swift/Parse/Confusables.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
//===--- Confusables.def - Confusable unicode characters ------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// CONFUSABLE(CONFUSABLE_POINT, BASEPOINT)

CONFUSABLE(0x2010, 0x2d)
CONFUSABLE(0x2011, 0x2d)
CONFUSABLE(0x2012, 0x2d)
CONFUSABLE(0x2013, 0x2d)
CONFUSABLE(0xfe58, 0x2d)
CONFUSABLE(0x6d4, 0x2d)
CONFUSABLE(0x2043, 0x2d)
CONFUSABLE(0x2d7, 0x2d)
CONFUSABLE(0x2212, 0x2d)
CONFUSABLE(0x2796, 0x2d)
CONFUSABLE(0x2cba, 0x2d)
CONFUSABLE(0x60d, 0x2c)
CONFUSABLE(0x66b, 0x2c)
CONFUSABLE(0x201a, 0x2c)
CONFUSABLE(0xb8, 0x2c)
CONFUSABLE(0xa4f9, 0x2c)
CONFUSABLE(0x903, 0x3a)
CONFUSABLE(0xa83, 0x3a)
CONFUSABLE(0xff1a, 0x3a)
CONFUSABLE(0x589, 0x3a)
CONFUSABLE(0x703, 0x3a)
CONFUSABLE(0x704, 0x3a)
CONFUSABLE(0x16ec, 0x3a)
CONFUSABLE(0xfe30, 0x3a)
CONFUSABLE(0x1803, 0x3a)
CONFUSABLE(0x1809, 0x3a)
CONFUSABLE(0x205a, 0x3a)
CONFUSABLE(0x5c3, 0x3a)
CONFUSABLE(0x2f8, 0x3a)
CONFUSABLE(0xa789, 0x3a)
CONFUSABLE(0x2236, 0x3a)
CONFUSABLE(0x2d0, 0x3a)
CONFUSABLE(0xa4fd, 0x3a)
CONFUSABLE(0xff01, 0x21)
CONFUSABLE(0x1c3, 0x21)
CONFUSABLE(0x2d51, 0x21)
CONFUSABLE(0x294, 0x3f)
CONFUSABLE(0x241, 0x3f)
CONFUSABLE(0x97d, 0x3f)
CONFUSABLE(0x13ae, 0x3f)
CONFUSABLE(0xa6eb, 0x3f)
CONFUSABLE(0x1d16d, 0x2e)
CONFUSABLE(0x2024, 0x2e)
CONFUSABLE(0x701, 0x2e)
CONFUSABLE(0x702, 0x2e)
CONFUSABLE(0xa60e, 0x2e)
CONFUSABLE(0x10a50, 0x2e)
CONFUSABLE(0x660, 0x2e)
CONFUSABLE(0x6f0, 0x2e)
CONFUSABLE(0xa4f8, 0x2e)
CONFUSABLE(0xff3b, 0x28)
CONFUSABLE(0x2768, 0x28)
CONFUSABLE(0x2772, 0x28)
CONFUSABLE(0x3014, 0x28)
CONFUSABLE(0xfd3e, 0x28)
CONFUSABLE(0xff3d, 0x29)
CONFUSABLE(0x2769, 0x29)
CONFUSABLE(0x2773, 0x29)
CONFUSABLE(0x3015, 0x29)
CONFUSABLE(0xfd3f, 0x29)
CONFUSABLE(0x2774, 0x7b)
CONFUSABLE(0x1d114, 0x7b)
CONFUSABLE(0x2775, 0x7d)
CONFUSABLE(0x204e, 0x2a)
CONFUSABLE(0x66d, 0x2a)
CONFUSABLE(0x2217, 0x2a)
CONFUSABLE(0x1031f, 0x2a)
CONFUSABLE(0x1735, 0x2f)
CONFUSABLE(0x2041, 0x2f)
CONFUSABLE(0x2215, 0x2f)
CONFUSABLE(0x2044, 0x2f)
CONFUSABLE(0x2571, 0x2f)
CONFUSABLE(0x27cb, 0x2f)
CONFUSABLE(0x29f8, 0x2f)
CONFUSABLE(0x1d23a, 0x2f)
CONFUSABLE(0x31d3, 0x2f)
CONFUSABLE(0x3033, 0x2f)
CONFUSABLE(0x2cc6, 0x2f)
CONFUSABLE(0x30ce, 0x2f)
CONFUSABLE(0x4e3f, 0x2f)
CONFUSABLE(0x2f03, 0x2f)
CONFUSABLE(0xff3c, 0x5c)
CONFUSABLE(0xfe68, 0x5c)
CONFUSABLE(0x2216, 0x5c)
CONFUSABLE(0x27cd, 0x5c)
CONFUSABLE(0x29f5, 0x5c)
CONFUSABLE(0x29f9, 0x5c)
CONFUSABLE(0x1d20f, 0x5c)
CONFUSABLE(0x1d23b, 0x5c)
CONFUSABLE(0x31d4, 0x5c)
CONFUSABLE(0x4e36, 0x5c)
CONFUSABLE(0x2f02, 0x5c)
CONFUSABLE(0xa778, 0x26)
CONFUSABLE(0x16ed, 0x2b)
CONFUSABLE(0x2795, 0x2b)
CONFUSABLE(0x1029b, 0x2b)
CONFUSABLE(0x2039, 0x3c)
CONFUSABLE(0x276e, 0x3c)
CONFUSABLE(0x2c2, 0x3c)
CONFUSABLE(0x1d236, 0x3c)
CONFUSABLE(0x1438, 0x3c)
CONFUSABLE(0x16b2, 0x3c)
CONFUSABLE(0x1400, 0x3d)
CONFUSABLE(0x2e40, 0x3d)
CONFUSABLE(0x30a0, 0x3d)
CONFUSABLE(0xa4ff, 0x3d)
CONFUSABLE(0x203a, 0x3e)
CONFUSABLE(0x276f, 0x3e)
CONFUSABLE(0x2c3, 0x3e)
CONFUSABLE(0x1d237, 0x3e)
CONFUSABLE(0x1433, 0x3e)

#undef CONFUSABLE
27 changes: 27 additions & 0 deletions include/swift/Parse/Confusables.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//===--- Confusables.h - Swift Confusable Character Diagnostics -*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#ifndef SWIFT_CONFUSABLES_H
#define SWIFT_CONFUSABLES_H

#include <stdint.h>

namespace swift {
namespace confusable {
/// Given a UTF-8 codepoint, determines whether it appears on the Unicode
/// specification table of confusable characters and maps to punctuation,
/// and either returns either the expected ASCII character or 0.
char tryConvertConfusableCharacterToASCII(uint32_t codepoint);
}
}

#endif
5 changes: 5 additions & 0 deletions include/swift/Parse/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
#include "llvm/Support/SaveAndRestore.h"

namespace swift {
/// Given a pointer to the starting byte of a UTF8 character, validate it and
/// advance the lexer past it. This returns the encoded character or ~0U if
/// the encoding is invalid.
uint32_t validateUTF8CharacterAndAdvance(const char *&Ptr, const char *End);

class DiagnosticEngine;
class InFlightDiagnostic;
class LangOptions;
Expand Down
1 change: 1 addition & 0 deletions lib/Parse/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_swift_library(swiftParse STATIC
Confusables.cpp
Lexer.cpp
ParseDecl.cpp
ParseExpr.cpp
Expand Down
22 changes: 22 additions & 0 deletions lib/Parse/Confusables.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===--- Confusables.cpp - Swift Confusable Character Diagnostics ---------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#include "swift/Parse/Confusables.h"

char swift::confusable::tryConvertConfusableCharacterToASCII(uint32_t codepoint) {
switch (codepoint) {
#define CONFUSABLE(CONFUSABLE_POINT, BASEPOINT) \
case CONFUSABLE_POINT: return BASEPOINT;
#include "swift/Parse/Confusables.def"
default: return 0;
}
}
20 changes: 18 additions & 2 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//

#include "swift/Parse/Confusables.h"
#include "swift/Parse/Lexer.h"
#include "swift/AST/DiagnosticsParse.h"
#include "swift/AST/Identifier.h"
Expand Down Expand Up @@ -49,7 +50,6 @@ using clang::isWhitespace;
/// true if it is an erroneous code point.
static bool EncodeToUTF8(unsigned CharValue,
SmallVectorImpl<char> &Result) {
assert(CharValue >= 0x80 && "Single-byte encoding should be already handled");
// Number of bits in the value, ignoring leading zeros.
unsigned NumBits = 32-llvm::countLeadingZeros(CharValue);

Expand Down Expand Up @@ -105,7 +105,7 @@ static bool isStartOfUTF8Character(unsigned char C) {
/// validateUTF8CharacterAndAdvance - Given a pointer to the starting byte of a
/// UTF8 character, validate it and advance the lexer past it. This returns the
/// encoded character or ~0U if the encoding is invalid.
static uint32_t validateUTF8CharacterAndAdvance(const char *&Ptr,
uint32_t swift::validateUTF8CharacterAndAdvance(const char *&Ptr,
const char *End) {
if (Ptr >= End)
return ~0U;
Expand Down Expand Up @@ -1863,6 +1863,22 @@ void Lexer::lexImpl() {
} else {
diagnose(CurPtr-1, diag::lex_invalid_character)
.fixItReplaceChars(getSourceLoc(CurPtr-1), getSourceLoc(tmp), " ");

char expectedCodepoint;
if ((expectedCodepoint =
confusable::tryConvertConfusableCharacterToASCII(codepoint))) {

llvm::SmallString<4> confusedChar;
EncodeToUTF8(codepoint, confusedChar);
llvm::SmallString<1> expectedChar;
expectedChar += expectedCodepoint;
diagnose(CurPtr-1, diag::lex_confusable_character,
confusedChar, expectedChar)
.fixItReplaceChars(getSourceLoc(CurPtr-1),
getSourceLoc(tmp),
expectedChar);
}

CurPtr = tmp;
goto Restart; // Skip presumed whitespace.
}
Expand Down
37 changes: 34 additions & 3 deletions lib/Sema/TypeCheckConstraints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "swift/AST/PrettyStackTrace.h"
#include "swift/AST/SubstitutionMap.h"
#include "swift/AST/TypeCheckerDebugConsumer.h"
#include "swift/Parse/Confusables.h"
#include "swift/Parse/Lexer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -454,9 +455,39 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
diagnose(Loc, diag::use_unresolved_identifier, Name, Name.isOperator())
.highlight(UDRE->getSourceRange());

// Note all the correction candidates.
for (auto &result : Lookup) {
noteTypoCorrection(Name, nameLoc, result);
const char *buffer = Name.getBaseName().get();
llvm::SmallString<64> expectedIdentifier;
bool isConfused = false;
uint32_t codepoint;
int offset = 0;
while ((codepoint = validateUTF8CharacterAndAdvance(buffer,
buffer +
strlen(buffer)))
!= ~0U) {
int length = (buffer - Name.getBaseName().get()) - offset;
char expectedCodepoint;
if ((expectedCodepoint =
confusable::tryConvertConfusableCharacterToASCII(codepoint))) {
isConfused = true;
expectedIdentifier += expectedCodepoint;
} else {
expectedIdentifier += (char)codepoint;
}

offset += length;
}

if (isConfused) {
diagnose(Loc, diag::confusable_character,
UDRE->getName().isOperator(),
Name.getBaseName().str(), expectedIdentifier)
.fixItReplaceChars(Loc, Loc.getAdvancedLoc(Name.getBaseName().getLength()),
expectedIdentifier);
} else {
// Note all the correction candidates.
for (auto &result : Lookup) {
noteTypoCorrection(Name, nameLoc, result);
}
}

// TODO: consider recovering from here. We may want some way to suppress
Expand Down
22 changes: 22 additions & 0 deletions test/Parse/confusables.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: %target-typecheck-verify-swift

// expected-error @+4 {{type annotation missing in pattern}}
// expected-error @+3 {{use of unresolved operator '⁚'}}
// expected-error @+2 {{operator with postfix spacing cannot start a subexpression}}
// expected-error @+1 {{consecutive statements on a line must be separated by ';'}}
let number⁚ Int // expected-note {{operator '⁚' contains possibly confused characters; did you mean to use ':'?}} {{11-14=:}}

// expected-warning @+3 2 {{integer literal is unused}}
// expected-error @+2 2 {{invalid character in source file}}
// expected-error @+1 {{consecutive statements on a line must be separated by ';'}}
5 ‒ 5 // expected-note 2 {{unicode character '‒' looks similar to '-'; did you mean to use '-'?}} {{3-6=-}}

// expected-error @+2 {{use of unresolved identifier 'ꝸꝸꝸ'}}
// expected-error @+1 {{expected ',' separator}}
if (true ꝸꝸꝸ false) {} // expected-note {{identifier 'ꝸꝸꝸ' contains possibly confused characters; did you mean to use '&&&'?}} {{10-19=&&&}}

// expected-error @+4 {{invalid character in source file}}
// expected-error @+3 {{expected ',' separator}}
// expected-error @+2 {{binary operator '==' cannot be applied to operands of type '(Int, Int)' and 'Int'}}
// expected-note @+1 {{expected an argument list of type '(Int, Int)'}}
if (5 ‒ 5) == 0 {} // expected-note {{unicode character '‒' looks similar to '-'; did you mean to use '-'?}} {{7-10=-}}
Loading