Skip to content

Commit e28c2e2

Browse files
committed
Fix <rdar://14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
This is a frequently reported and surprising issue where lack of whitespace leads to rejecting common code like "X*-4". Fix this by diagnosing it specifically as a lack of whitespace problem, including a fixit to insert the missing whitespace (to transform it into "X * -4". This even handles the cases where there are multiple valid (single) splits possible by emitting a series of notes.
1 parent d221401 commit e28c2e2

File tree

5 files changed

+189
-16
lines changed

5 files changed

+189
-16
lines changed

include/swift/AST/DiagnosticsSema.def

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,16 +420,28 @@ ERROR(ambiguous_module_type,sema_nb,none,
420420
ERROR(use_nonmatching_operator,sema_nb,none,
421421
"%0 is not a %select{binary|prefix unary|postfix unary}1 operator",
422422
(Identifier, unsigned))
423+
424+
ERROR(unspaced_operators_fixit,sema_nb,none,
425+
"missing whitespace between %0 and %1 operators",
426+
(Identifier, Identifier, bool))
427+
ERROR(unspaced_operators,sema_nb,none,
428+
"ambiguous missing whitespace between unary and binary operators", ())
429+
NOTE(unspaced_operators_candidate,sema_nb,none,
430+
"could be %select{binary|postfix}2 %0 and %select{prefix|binary}2 %1",
431+
(Identifier, Identifier, bool))
432+
433+
434+
423435
ERROR(use_unresolved_identifier,sema_nb,none,
424436
"use of unresolved %select{identifier|operator}1 %0", (Identifier, bool))
425437
ERROR(use_undeclared_type,sema_nb,none,
426438
"use of undeclared type %0", (Identifier))
427439
ERROR(use_undeclared_type_did_you_mean,sema_nb,none,
428-
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
440+
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
429441
NOTE(note_remapped_type,sema_nb,none,
430-
"did you mean to use '%0'?", (StringRef))
442+
"did you mean to use '%0'?", (StringRef))
431443
ERROR(identifier_init_failure,sema_nb,none,
432-
"could not infer type for %0", (Identifier))
444+
"could not infer type for %0", (Identifier))
433445
ERROR(pattern_used_in_type,sema_nb,none,
434446
"%0 used within its own type", (Identifier))
435447
NOTE(note_module_as_type,sema_nb,none,

include/swift/Parse/Lexer.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,15 @@ class Lexer {
240240
restoreState(S);
241241
}
242242

243+
/// \brief Retrieve the Token referred to by \c Loc.
244+
///
245+
/// \param SM The source manager in which the given source location
246+
/// resides.
247+
///
248+
/// \param Loc The source location of the beginning of a token.
249+
static Token getTokenAtLocation(const SourceManager &SM, SourceLoc Loc);
250+
251+
243252
/// \brief Retrieve the source location that points just past the
244253
/// end of the token referred to by \c Loc.
245254
///
@@ -299,6 +308,10 @@ class Lexer {
299308
/// reserved word.
300309
static tok kindOfIdentifier(StringRef Str, bool InSILMode);
301310

311+
/// \brief Determines if the given string is a valid operator identifier,
312+
/// without escaping characters.
313+
static bool isOperator(StringRef string);
314+
302315
SourceLoc getLocForStartOfBuffer() const {
303316
return SourceLoc(llvm::SMLoc::getFromPointer(BufferStart));
304317
}

lib/Parse/Lexer.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,18 @@ bool Lexer::isIdentifier(StringRef string) {
523523
return p == end;
524524
}
525525

526+
/// \brief Determines if the given string is a valid operator identifier,
527+
/// without escaping characters.
528+
bool Lexer::isOperator(StringRef string) {
529+
if (string.empty()) return false;
530+
char const *p = string.data(), *end = string.end();
531+
if (!advanceIfValidStartOfOperator(p, end))
532+
return false;
533+
while (p < end && advanceIfValidContinuationOfOperator(p, end));
534+
return p == end;
535+
}
536+
537+
526538
tok Lexer::kindOfIdentifier(StringRef Str, bool InSILMode) {
527539
tok Kind = llvm::StringSwitch<tok>(Str)
528540
#define KEYWORD(kw) \
@@ -1664,15 +1676,15 @@ void Lexer::lexImpl() {
16641676
}
16651677
}
16661678

1667-
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
1679+
Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) {
16681680
// Don't try to do anything with an invalid location.
16691681
if (!Loc.isValid())
1670-
return Loc;
1682+
return Token();
16711683

16721684
// Figure out which buffer contains this location.
16731685
int BufferID = SM.findBufferContainingLoc(Loc);
16741686
if (BufferID < 0)
1675-
return SourceLoc();
1687+
return Token();
16761688

16771689
// Use fake language options; language options only affect validity
16781690
// and the exact token produced.
@@ -1685,10 +1697,14 @@ SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
16851697
Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
16861698
CommentRetentionMode::ReturnAsTokens);
16871699
L.restoreState(State(Loc));
1688-
unsigned Length = L.peekNextToken().getLength();
1689-
return Loc.getAdvancedLoc(Length);
1700+
return L.peekNextToken();
16901701
}
16911702

1703+
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
1704+
return Loc.getAdvancedLoc(getTokenAtLocation(SM, Loc).getLength());
1705+
}
1706+
1707+
16921708
static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
16931709
unsigned BufferID,
16941710
unsigned Offset,

lib/Sema/TypeCheckConstraints.cpp

Lines changed: 134 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,132 @@ static bool matchesDeclRefKind(ValueDecl *value, DeclRefKind refKind) {
281281
llvm_unreachable("bad declaration reference kind");
282282
}
283283

284+
static bool containsDeclRefKind(LookupResult &lookupResult,
285+
DeclRefKind refKind) {
286+
for (auto candidate : lookupResult) {
287+
ValueDecl *D = candidate.Decl;
288+
if (!D || !D->hasType())
289+
continue;
290+
if (matchesDeclRefKind(D, refKind))
291+
return true;
292+
}
293+
return false;
294+
}
295+
296+
/// Emit a diagnostic with a fixit hint for an invalid binary operator, showing
297+
/// how to split it according to splitCandidate.
298+
static void diagnoseOperatorSplit(UnresolvedDeclRefExpr *UDRE,
299+
std::pair<unsigned, bool> splitCandidate,
300+
Diag<Identifier, Identifier, bool> diagID,
301+
TypeChecker &TC) {
302+
303+
unsigned splitLoc = splitCandidate.first;
304+
bool isBinOpFirst = splitCandidate.second;
305+
StringRef nameStr = UDRE->getName().str();
306+
auto startStr = nameStr.substr(0, splitLoc);
307+
auto endStr = nameStr.drop_front(splitLoc);
308+
309+
// One valid split found, it is almost certainly the right answer.
310+
auto diag = TC.diagnose(UDRE->getLoc(), diagID,
311+
TC.Context.getIdentifier(startStr),
312+
TC.Context.getIdentifier(endStr), isBinOpFirst);
313+
// Highlight the whole operator.
314+
diag.highlight(UDRE->getLoc());
315+
// Insert whitespace on the left if the binop is at the start, or to the
316+
// right if it is end.
317+
if (isBinOpFirst)
318+
diag.fixItInsert(UDRE->getLoc(), " ");
319+
else
320+
diag.fixItInsertAfter(UDRE->getLoc(), " ");
321+
322+
// Insert a space between the operators.
323+
diag.fixItInsert(UDRE->getLoc().getAdvancedLoc(splitLoc), " ");
324+
}
325+
326+
/// If we failed lookup of a binary operator, check to see it to see if
327+
/// it is a binary operator juxtaposed with a unary operator (x*-4) that
328+
/// needs whitespace. If so, emit specific diagnostics for it and return true,
329+
/// otherwise return false.
330+
static bool diagnoseJuxtaposedBinOp(UnresolvedDeclRefExpr *UDRE,
331+
DeclContext *DC,
332+
TypeChecker &TC) {
333+
Identifier name = UDRE->getName();
334+
StringRef nameStr = name.str();
335+
if (!name.isOperator() || nameStr.size() < 2 ||
336+
UDRE->getRefKind() != DeclRefKind::BinaryOperator)
337+
return false;
338+
339+
// Relex the token, to decide whether it has whitespace around it or not. If
340+
// it does, it isn't likely to be a case where a space was forgotten.
341+
auto tok = Lexer::getTokenAtLocation(TC.Context.SourceMgr, UDRE->getLoc());
342+
if (tok.getKind() != tok::oper_binary_unspaced)
343+
return false;
344+
345+
// Okay, we have a failed lookup of a multicharacter unspaced binary operator.
346+
// Check to see if lookup succeeds if a prefix or postfix operator is split
347+
// off, and record the matches found. The bool indicated is false if the
348+
// first half of the split is the unary operator (x!*4) or true if it is the
349+
// binary operator (x*+4).
350+
std::vector<std::pair<unsigned, bool>> WorkableSplits;
351+
352+
// Check all the potential splits.
353+
for (unsigned splitLoc = 1, e = nameStr.size(); splitLoc != e; ++splitLoc) {
354+
// For it to be a valid split, the start and end section must be valid
355+
// operators, spliting a unicode code point isn't kosher.
356+
auto startStr = nameStr.substr(0, splitLoc);
357+
auto endStr = nameStr.drop_front(splitLoc);
358+
if (!Lexer::isOperator(startStr) || !Lexer::isOperator(endStr))
359+
continue;
360+
361+
auto startName = TC.Context.getIdentifier(startStr);
362+
auto endName = TC.Context.getIdentifier(endStr);
363+
364+
// Perform name lookup for the first and second pieces. If either fail to
365+
// be found, then it isn't a valid split.
366+
NameLookupOptions LookupOptions = defaultUnqualifiedLookupOptions;
367+
if (isa<AbstractFunctionDecl>(DC))
368+
LookupOptions |= NameLookupFlags::KnownPrivate;
369+
auto startLookup = TC.lookupUnqualified(DC, startName, UDRE->getLoc(),
370+
LookupOptions);
371+
if (!startLookup) continue;
372+
auto endLookup = TC.lookupUnqualified(DC, endName, UDRE->getLoc(),
373+
LookupOptions);
374+
if (!endLookup) continue;
375+
376+
// Look to see if the candidates found could possibly match.
377+
if (containsDeclRefKind(startLookup, DeclRefKind::PostfixOperator) &&
378+
containsDeclRefKind(endLookup, DeclRefKind::BinaryOperator))
379+
WorkableSplits.push_back({ splitLoc, false });
380+
381+
if (containsDeclRefKind(startLookup, DeclRefKind::BinaryOperator) &&
382+
containsDeclRefKind(endLookup, DeclRefKind::PrefixOperator))
383+
WorkableSplits.push_back({ splitLoc, true });
384+
}
385+
386+
switch (WorkableSplits.size()) {
387+
case 0:
388+
// No splits found, can't produce this diagnostic.
389+
return false;
390+
case 1:
391+
// One candidate: produce an error with a fixit on it.
392+
diagnoseOperatorSplit(UDRE, WorkableSplits[0],
393+
diag::unspaced_operators_fixit, TC);
394+
return true;
395+
396+
default:
397+
// Otherwise, we have to produce a series of notes listing the various
398+
// options.
399+
TC.diagnose(UDRE->getLoc(), diag::unspaced_operators)
400+
.highlight(UDRE->getLoc());
401+
402+
for (auto candidateSplit : WorkableSplits)
403+
diagnoseOperatorSplit(UDRE, candidateSplit,
404+
diag::unspaced_operators_candidate, TC);
405+
return true;
406+
}
407+
}
408+
409+
284410
/// Bind an UnresolvedDeclRefExpr by performing name lookup and
285411
/// returning the resultant expression. Context is the DeclContext used
286412
/// for the lookup.
@@ -297,9 +423,14 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
297423
auto Lookup = lookupUnqualified(DC, Name, Loc, LookupOptions);
298424

299425
if (!Lookup) {
300-
diagnose(Loc, diag::use_unresolved_identifier, Name,
301-
UDRE->getName().isOperator())
302-
.highlight(Loc);
426+
// If we failed lookup of a binary operator, check to see it to see if
427+
// it is a binary operator juxtaposed with a unary operator (x*-4) that
428+
// needs whitespace. If so, emit specific diagnostics for it.
429+
if (!diagnoseJuxtaposedBinOp(UDRE, DC, *this)) {
430+
diagnose(Loc, diag::use_unresolved_identifier, Name,
431+
UDRE->getName().isOperator())
432+
.highlight(Loc);
433+
}
303434
return new (Context) ErrorExpr(Loc);
304435
}
305436

test/decl/operators.swift

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,9 @@ func ??= <T>(inout result : T?, rhs : Int) { // ok
172172

173173

174174

175-
176-
_ = n*-4 // expected-error {{use of unresolved operator '*-'}}
177-
178-
if n==-1 {} // expected-error {{use of unresolved operator '==-'}}
175+
// <rdar://problem/14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
176+
_ = n*-4 // expected-error {{missing whitespace between '*' and '-' operators}} {{6-6= }} {{7-7= }}
177+
if n==-1 {} // expected-error {{missing whitespace between '==' and '-' operators}} {{5-5= }} {{7-7= }}
179178

180179
prefix operator ☃⃠ {}
181180
prefix func☃⃠(a : Int) -> Int { return a }
@@ -184,8 +183,10 @@ postfix func☃⃠(a : Int) -> Int { return a }
184183

185184
_ = n☃⃠ ☃⃠ n // Ok.
186185
_ = n ☃⃠ ☃⃠n // Ok.
187-
_ = n☃⃠☃⃠n // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
188186
_ = n ☃⃠☃⃠ n // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
187+
_ = n☃⃠☃⃠n // expected-error {{ambiguous missing whitespace between unary and binary operators}}
188+
// expected-note @-1 {{could be binary '☃⃠' and prefix '☃⃠'}} {{12-12= }} {{18-18= }}
189+
// expected-note @-2 {{could be postfix '☃⃠' and binary '☃⃠'}} {{6-6= }} {{12-12= }}
189190

190191

191192

0 commit comments

Comments
 (0)