swiftlang
diff --git a/‎clang-tools-extra/clang-tidy/misc/CMakeLists.txt
Lines changed: 20 additions & 0 deletions b/‎clang-tools-extra/clang-tidy/misc/CMakeLists.txt
Lines changed: 20 additions & 0 deletions
diff --git a/‎clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp
Lines changed: 126 additions & 0 deletions b/‎clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp
Lines changed: 126 additions & 0 deletions
diff --git a/‎clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
Lines changed: 40 additions & 0 deletions b/‎clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
Lines changed: 40 additions & 0 deletions
diff --git a/‎clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp
Lines changed: 85 additions & 0 deletions b/‎clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp
Lines changed: 85 additions & 0 deletions
diff --git a/‎clang-tools-extra/clang-tidy/misc/ConfusableTable/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎clang-tools-extra/clang-tidy/misc/ConfusableTable/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
@@ -3,8 +3,27 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
+if(LLVM_USE_HOST_TOOLS)
+  build_native_tool(make-confusable-table make_confusable_table)
+  set(make_confusable_table_target "${make_confusable_table}")
+else()
+  set(make_confusable_table $<TARGET_FILE:make-confusable-table>)
+  set(make_confusable_table_target make-confusable-table)
+endif()
+
+add_subdirectory(ConfusableTable)
+
+
+add_custom_command(
+    OUTPUT Confusables.inc
+    COMMAND ${make_confusable_table} ${CMAKE_CURRENT_SOURCE_DIR}/ConfusableTable/confusables.txt ${CMAKE_CURRENT_BINARY_DIR}/Confusables.inc
+    DEPENDS ${make_confusable_table_target} ConfusableTable/confusables.txt)
+
+add_custom_target(genconfusable DEPENDS Confusables.inc)
+
 add_clang_library(clangTidyMiscModule
   DefinitionsInHeadersCheck.cpp
+  ConfusableIdentifierCheck.cpp
   MiscTidyModule.cpp
   MisleadingBidirectional.cpp
   MisleadingIdentifier.cpp
@@ -28,6 +47,7 @@ add_clang_library(clangTidyMiscModule
 
   DEPENDS
   omp_gen
+  genconfusable
   )
 
 clang_target_link_libraries(clangTidyMiscModule
 
@@ -0,0 +1,126 @@
+//===--- ConfusableIdentifierCheck.cpp -
+// clang-tidy--------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConfusableIdentifierCheck.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ConvertUTF.h"
+
+namespace {
+// Preprocessed version of
+// https://www.unicode.org/Public/security/latest/confusables.txt
+//
+// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
+#include "Confusables.inc"
+} // namespace
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name,
+                                                     ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context) {}
+
+ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default;
+
+// Build a skeleton out of the Original identifier, inspired by the algorithm
+// described in http://www.unicode.org/reports/tr39/#def-skeleton
+//
+// FIXME: TR39 mandates:
+//
+// For an input string X, define skeleton(X) to be the following transformation
+// on the string:
+//
+// 1. Convert X to NFD format, as described in [UAX15].
+// 2. Concatenate the prototypes for each character in X according to the
+// specified data, producing a string of exemplar characters.
+// 3. Reapply NFD.
+//
+// We're skipping 1. and 3. for the sake of simplicity, but this can lead to
+// false positive.
+
+std::string ConfusableIdentifierCheck::skeleton(StringRef Name) {
+  using namespace llvm;
+  std::string SName = Name.str();
+  std::string Skeleton;
+  Skeleton.reserve(1 + Name.size());
+
+  const char *Curr = SName.c_str();
+  const char *End = Curr + SName.size();
+  while (Curr < End) {
+
+    const char *Prev = Curr;
+    UTF32 CodePoint;
+    ConversionResult Result = convertUTF8Sequence(
+        reinterpret_cast<const UTF8 **>(&Curr),
+        reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion);
+    if (Result != conversionOK) {
+      errs() << "Unicode conversion issue\n";
+      break;
+    }
+
+    StringRef Key(Prev, Curr - Prev);
+    auto Where = std::lower_bound(std::begin(ConfusableEntries),
+                                  std::end(ConfusableEntries), CodePoint,
+                                  [](decltype(ConfusableEntries[0]) x,
+                                     UTF32 y) { return x.codepoint < y; });
+    if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
+      Skeleton.append(Prev, Curr);
+    } else {
+      UTF8 Buffer[32];
+      UTF8 *BufferStart = std::begin(Buffer);
+      UTF8 *IBuffer = BufferStart;
+      const UTF32 *ValuesStart = std::begin(Where->values);
+      const UTF32 *ValuesEnd =
+          std::find(std::begin(Where->values), std::end(Where->values), '\0');
+      if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
+                             std::end(Buffer),
+                             strictConversion) != conversionOK) {
+        errs() << "Unicode conversion issue\n";
+        break;
+      }
+      Skeleton.append((char *)BufferStart, (char *)IBuffer);
+    }
+  }
+  return Skeleton;
+}
+
+void ConfusableIdentifierCheck::check(
+    const ast_matchers::MatchFinder::MatchResult &Result) {
+  if (const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl")) {
+    if (IdentifierInfo *II = ND->getIdentifier()) {
+      StringRef NDName = II->getName();
+      llvm::SmallVector<const NamedDecl *> &Mapped = Mapper[skeleton(NDName)];
+      const DeclContext *NDDecl = ND->getDeclContext();
+      for (const NamedDecl *OND : Mapped) {
+        if (!NDDecl->isDeclInLexicalTraversal(OND) &&
+            !OND->getDeclContext()->isDeclInLexicalTraversal(ND))
+          continue;
+        if (OND->getIdentifier()->getName() != NDName) {
+          diag(OND->getLocation(), "%0 is confusable with %1")
+              << OND->getName() << NDName;
+          diag(ND->getLocation(), "other declaration found here",
+               DiagnosticIDs::Note);
+        }
+      }
+      Mapped.push_back(ND);
+    }
+  }
+}
+
+void ConfusableIdentifierCheck::registerMatchers(
+    ast_matchers::MatchFinder *Finder) {
+  Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
+}
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
@@ -0,0 +1,40 @@
+//===--- ConfusableIdentifierCheck.h - clang-tidy
+//-------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+/// Finds symbol which have confusable identifiers, i.e. identifiers that look
+/// the same visually but have a different Unicode representation.
+/// If symbols are confusable but don't live in conflicting namespaces, they are
+/// not reported.
+class ConfusableIdentifierCheck : public ClangTidyCheck {
+public:
+  ConfusableIdentifierCheck(StringRef Name, ClangTidyContext *Context);
+  ~ConfusableIdentifierCheck();
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+  std::string skeleton(StringRef);
+  llvm::StringMap<llvm::SmallVector<const NamedDecl *>> Mapper;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
@@ -0,0 +1,85 @@
+//===--- BuildConfusableTable.cpp - clang-tidy---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+
+using namespace llvm;
+
+int main(int argc, char *argv[]) {
+  auto ErrorOrBuffer = MemoryBuffer::getFile(argv[1], true);
+  if (!ErrorOrBuffer)
+    return 1;
+  std::unique_ptr<MemoryBuffer> Buffer = std::move(ErrorOrBuffer.get());
+  StringRef Content = Buffer->getBuffer();
+  Content = Content.drop_until([](char c) { return c == '#'; });
+  SmallVector<StringRef> Lines;
+  SplitString(Content, Lines, "\r\n");
+
+  std::vector<std::pair<llvm::UTF32, SmallVector<llvm::UTF32>>> Entries;
+  SmallVector<StringRef> Values;
+  for (StringRef Line : Lines) {
+    if (Line.startswith("#"))
+      continue;
+
+    Values.clear();
+    Line.split(Values, ';');
+    if (Values.size() < 2) {
+      errs() << "Failed to parse: " << Line << "\n";
+      return 2;
+    }
+
+    llvm::StringRef From = Values[0].trim();
+    llvm::UTF32 CodePoint;
+    From.getAsInteger(16, CodePoint);
+
+    SmallVector<llvm::UTF32> To;
+    SmallVector<StringRef> ToN;
+    Values[1].split(ToN, ' ', -1, false);
+    for (StringRef To_ : ToN) {
+      llvm::UTF32 ToCodePoint;
+      To_.trim().getAsInteger(16, ToCodePoint);
+      To.push_back(ToCodePoint);
+    }
+    // Sentinel
+    To.push_back(0);
+
+    Entries.emplace_back(CodePoint, To);
+  }
+  std::sort(Entries.begin(), Entries.end());
+
+  unsigned LargestValue =
+      std::max_element(Entries.begin(), Entries.end(),
+                       [](const auto &Entry0, const auto &Entry1) {
+                         return Entry0.second.size() < Entry1.second.size();
+                       })
+          ->second.size();
+
+  std::error_code ec;
+  llvm::raw_fd_ostream os(argv[2], ec);
+
+  // FIXME: If memory consumption and/or lookup time becomes a constraint, it
+  // maybe worth using a more elaborate data structure.
+  os << "struct {llvm::UTF32 codepoint; llvm::UTF32 values[" << LargestValue
+     << "];} "
+        "ConfusableEntries[] = {\n";
+  for (const auto &Values : Entries) {
+    os << "  { ";
+    os << Values.first;
+    os << ", {";
+    for (auto CP : Values.second)
+      os << CP << ", ";
+
+    os << "}},\n";
+  }
+  os << "};\n";
+  return 0;
+}
@@ -0,0 +1 @@
+add_llvm_executable(make-confusable-table BuildConfusableTable.cpp)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+add_llvm_executable(make-confusable-table BuildConfusableTable.cpp)`