|
| 1 | +//===--- ConfusableIdentifierCheck.cpp - |
| 2 | +// clang-tidy--------------------------===// |
| 3 | +// |
| 4 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | +// See https://llvm.org/LICENSE.txt for license information. |
| 6 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | + |
| 10 | +#include "ConfusableIdentifierCheck.h" |
| 11 | + |
| 12 | +#include "clang/Frontend/CompilerInstance.h" |
| 13 | +#include "clang/Lex/Preprocessor.h" |
| 14 | +#include "llvm/Support/ConvertUTF.h" |
| 15 | + |
| 16 | +namespace { |
| 17 | +// Preprocessed version of |
| 18 | +// https://www.unicode.org/Public/security/latest/confusables.txt |
| 19 | +// |
| 20 | +// This contains a sorted array of { UTF32 codepoint; UTF32 values[N];} |
| 21 | +#include "Confusables.inc" |
| 22 | +} // namespace |
| 23 | + |
| 24 | +namespace clang { |
| 25 | +namespace tidy { |
| 26 | +namespace misc { |
| 27 | + |
| 28 | +ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name, |
| 29 | + ClangTidyContext *Context) |
| 30 | + : ClangTidyCheck(Name, Context) {} |
| 31 | + |
| 32 | +ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default; |
| 33 | + |
| 34 | +// Build a skeleton out of the Original identifier, inspired by the algorithm |
| 35 | +// described in http://www.unicode.org/reports/tr39/#def-skeleton |
| 36 | +// |
| 37 | +// FIXME: TR39 mandates: |
| 38 | +// |
| 39 | +// For an input string X, define skeleton(X) to be the following transformation |
| 40 | +// on the string: |
| 41 | +// |
| 42 | +// 1. Convert X to NFD format, as described in [UAX15]. |
| 43 | +// 2. Concatenate the prototypes for each character in X according to the |
| 44 | +// specified data, producing a string of exemplar characters. |
| 45 | +// 3. Reapply NFD. |
| 46 | +// |
| 47 | +// We're skipping 1. and 3. for the sake of simplicity, but this can lead to |
| 48 | +// false positive. |
| 49 | + |
| 50 | +std::string ConfusableIdentifierCheck::skeleton(StringRef Name) { |
| 51 | + using namespace llvm; |
| 52 | + std::string SName = Name.str(); |
| 53 | + std::string Skeleton; |
| 54 | + Skeleton.reserve(1 + Name.size()); |
| 55 | + |
| 56 | + const char *Curr = SName.c_str(); |
| 57 | + const char *End = Curr + SName.size(); |
| 58 | + while (Curr < End) { |
| 59 | + |
| 60 | + const char *Prev = Curr; |
| 61 | + UTF32 CodePoint; |
| 62 | + ConversionResult Result = convertUTF8Sequence( |
| 63 | + reinterpret_cast<const UTF8 **>(&Curr), |
| 64 | + reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion); |
| 65 | + if (Result != conversionOK) { |
| 66 | + errs() << "Unicode conversion issue\n"; |
| 67 | + break; |
| 68 | + } |
| 69 | + |
| 70 | + StringRef Key(Prev, Curr - Prev); |
| 71 | + auto Where = std::lower_bound(std::begin(ConfusableEntries), |
| 72 | + std::end(ConfusableEntries), CodePoint, |
| 73 | + [](decltype(ConfusableEntries[0]) x, |
| 74 | + UTF32 y) { return x.codepoint < y; }); |
| 75 | + if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) { |
| 76 | + Skeleton.append(Prev, Curr); |
| 77 | + } else { |
| 78 | + UTF8 Buffer[32]; |
| 79 | + UTF8 *BufferStart = std::begin(Buffer); |
| 80 | + UTF8 *IBuffer = BufferStart; |
| 81 | + const UTF32 *ValuesStart = std::begin(Where->values); |
| 82 | + const UTF32 *ValuesEnd = |
| 83 | + std::find(std::begin(Where->values), std::end(Where->values), '\0'); |
| 84 | + if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer, |
| 85 | + std::end(Buffer), |
| 86 | + strictConversion) != conversionOK) { |
| 87 | + errs() << "Unicode conversion issue\n"; |
| 88 | + break; |
| 89 | + } |
| 90 | + Skeleton.append((char *)BufferStart, (char *)IBuffer); |
| 91 | + } |
| 92 | + } |
| 93 | + return Skeleton; |
| 94 | +} |
| 95 | + |
| 96 | +void ConfusableIdentifierCheck::check( |
| 97 | + const ast_matchers::MatchFinder::MatchResult &Result) { |
| 98 | + if (const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl")) { |
| 99 | + if (IdentifierInfo *II = ND->getIdentifier()) { |
| 100 | + StringRef NDName = II->getName(); |
| 101 | + llvm::SmallVector<const NamedDecl *> &Mapped = Mapper[skeleton(NDName)]; |
| 102 | + const DeclContext *NDDecl = ND->getDeclContext(); |
| 103 | + for (const NamedDecl *OND : Mapped) { |
| 104 | + if (!NDDecl->isDeclInLexicalTraversal(OND) && |
| 105 | + !OND->getDeclContext()->isDeclInLexicalTraversal(ND)) |
| 106 | + continue; |
| 107 | + if (OND->getIdentifier()->getName() != NDName) { |
| 108 | + diag(OND->getLocation(), "%0 is confusable with %1") |
| 109 | + << OND->getName() << NDName; |
| 110 | + diag(ND->getLocation(), "other declaration found here", |
| 111 | + DiagnosticIDs::Note); |
| 112 | + } |
| 113 | + } |
| 114 | + Mapped.push_back(ND); |
| 115 | + } |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +void ConfusableIdentifierCheck::registerMatchers( |
| 120 | + ast_matchers::MatchFinder *Finder) { |
| 121 | + Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this); |
| 122 | +} |
| 123 | + |
| 124 | +} // namespace misc |
| 125 | +} // namespace tidy |
| 126 | +} // namespace clang |
0 commit comments