Skip to content

[clang-format] Add an fnmatch-like function for .clang-format-ignore #76021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/lib/Format/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_clang_library(clangFormat
IntegerLiteralSeparatorFixer.cpp
MacroCallReconstructor.cpp
MacroExpander.cpp
MatchFilePath.cpp
NamespaceEndCommentsFixer.cpp
ObjCPropertyAttributeOrderFixer.cpp
QualifierAlignmentFixer.cpp
Expand Down
122 changes: 122 additions & 0 deletions clang/lib/Format/MatchFilePath.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the functionality of matching a file path name to
/// a pattern, similar to the POSIX fnmatch() function.
///
//===----------------------------------------------------------------------===//

#include "MatchFilePath.h"

using namespace llvm;

namespace clang {
namespace format {

// Check whether `FilePath` matches `Pattern` based on POSIX (1003.1-2008)
// 2.13.1, 2.13.2, and Rule 1 of 2.13.3.
bool matchFilePath(StringRef Pattern, StringRef FilePath) {
assert(!Pattern.empty());
assert(!FilePath.empty());

// No match if `Pattern` ends with a non-meta character not equal to the last
// character of `FilePath`.
if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
return false;

constexpr auto Separator = '/';
const auto EOP = Pattern.size(); // End of `Pattern`.
const auto End = FilePath.size(); // End of `FilePath`.
unsigned I = 0; // Index to `Pattern`.

for (unsigned J = 0; J < End; ++J) {
if (I == EOP)
return false;

switch (const auto F = FilePath[J]; Pattern[I]) {
case '\\':
if (++I == EOP || F != Pattern[I])
return false;
break;
case '?':
if (F == Separator)
return false;
break;
case '*': {
while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars.
}
const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
if (I == EOP) // `Pattern` ends with a star.
return NoMoreSeparatorsInFilePath;
// `Pattern` ends with a lone backslash.
if (Pattern[I] == '\\' && ++I == EOP)
return false;
// The star is followed by a (possibly escaped) `Separator`.
if (Pattern[I] == Separator) {
if (NoMoreSeparatorsInFilePath)
return false;
J = K; // Skip to next `Separator` in `FilePath`.
break;
}
// Recurse.
for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator;
++J) {
if (matchFilePath(Pat, FilePath.substr(J)))
return true;
}
return false;
}
case '[':
// Skip e.g. `[!]`.
if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
// Skip unpaired `[`, brackets containing slashes, and `[]`.
if (const auto K = Pattern.find_first_of("]/", I + 1);
K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
if (F == Separator)
return false;
++I; // After the `[`.
bool Negated = false;
if (Pattern[I] == '!') {
Negated = true;
++I; // After the `!`.
}
bool Match = false;
do {
if (I + 2 < K && Pattern[I + 1] == '-') {
Match = Pattern[I] <= F && F <= Pattern[I + 2];
I += 3; // After the range, e.g. `A-Z`.
} else {
Match = F == Pattern[I++];
}
} while (!Match && I < K);
if (Negated ? Match : !Match)
return false;
I = K + 1; // After the `]`.
continue;
}
}
[[fallthrough]]; // Match `[` literally.
default:
if (F != Pattern[I])
return false;
}

++I;
}

// Match trailing stars with null strings.
while (I < EOP && Pattern[I] == '*')
++I;

return I == EOP;
}

} // namespace format
} // namespace clang
22 changes: 22 additions & 0 deletions clang/lib/Format/MatchFilePath.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===--- MatchFilePath.h ----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
#define LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H

#include "llvm/ADT/StringRef.h"

namespace clang {
namespace format {

bool matchFilePath(llvm::StringRef Pattern, llvm::StringRef FilePath);

} // end namespace format
} // end namespace clang

#endif
1 change: 1 addition & 0 deletions clang/unittests/Format/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ add_clang_unittest(FormatTests
IntegerLiteralSeparatorTest.cpp
MacroCallReconstructorTest.cpp
MacroExpanderTest.cpp
MatchFilePathTest.cpp
NamespaceEndCommentsFixerTest.cpp
ObjCPropertyAttributeOrderFixerTest.cpp
QualifierFixerTest.cpp
Expand Down
169 changes: 169 additions & 0 deletions clang/unittests/Format/MatchFilePathTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//===- unittest/Format/MatchFilePathTest.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "../../lib/Format/MatchFilePath.h"
#include "gtest/gtest.h"

namespace clang {
namespace format {
namespace {

class MatchFilePathTest : public ::testing::Test {
protected:
bool match(llvm::StringRef FilePath, llvm::StringRef Pattern) {
return matchFilePath(Pattern, FilePath);
}
};

// Most of the test cases below are from:
// https://github.com/python/cpython/blob/main/Lib/test/test_fnmatch.py

TEST_F(MatchFilePathTest, Wildcard) {
EXPECT_TRUE(match("abc", "?*?"));
EXPECT_TRUE(match("abc", "???*"));
EXPECT_TRUE(match("abc", "*???"));
EXPECT_TRUE(match("abc", "???"));
EXPECT_TRUE(match("abc", "*"));
EXPECT_TRUE(match("abc", "ab[cd]"));
EXPECT_TRUE(match("abc", "ab[!de]"));
EXPECT_FALSE(match("abc", "ab[de]"));
EXPECT_FALSE(match("a", "??"));
EXPECT_FALSE(match("a", "b"));
}

TEST_F(MatchFilePathTest, Backslash) {
EXPECT_TRUE(match("a?", R"(a\?)"));
EXPECT_FALSE(match("a\\", R"(a\)"));
EXPECT_TRUE(match("\\", R"([\])"));
EXPECT_TRUE(match("a", R"([!\])"));
EXPECT_FALSE(match("\\", R"([!\])"));
}

TEST_F(MatchFilePathTest, Newline) {
EXPECT_TRUE(match("foo\nbar", "foo*"));
EXPECT_TRUE(match("foo\nbar\n", "foo*"));
EXPECT_FALSE(match("\nfoo", "foo*"));
EXPECT_TRUE(match("\n", "*"));
}

TEST_F(MatchFilePathTest, Star) {
EXPECT_TRUE(match(std::string(50, 'a'), "*a*a*a*a*a*a*a*a*a*a"));
EXPECT_FALSE(match((std::string(50, 'a') + 'b'), "*a*a*a*a*a*a*a*a*a*a"));
}

TEST_F(MatchFilePathTest, CaseSensitive) {
EXPECT_TRUE(match("abc", "abc"));
EXPECT_FALSE(match("AbC", "abc"));
EXPECT_FALSE(match("abc", "AbC"));
EXPECT_TRUE(match("AbC", "AbC"));
}

TEST_F(MatchFilePathTest, PathSeparators) {
EXPECT_TRUE(match("usr/bin", "usr/bin"));
EXPECT_TRUE(match("usr\\bin", R"(usr\\bin)"));
}

TEST_F(MatchFilePathTest, NumericEscapeSequence) {
EXPECT_TRUE(match("test", "te*"));
EXPECT_TRUE(match("test\xff", "te*\xff"));
EXPECT_TRUE(match("foo\nbar", "foo*"));
}

TEST_F(MatchFilePathTest, ValidBrackets) {
EXPECT_TRUE(match("z", "[az]"));
EXPECT_FALSE(match("z", "[!az]"));
EXPECT_TRUE(match("a", "[aa]"));
EXPECT_TRUE(match("^", "[^az]"));
EXPECT_TRUE(match("[", "[[az]"));
EXPECT_FALSE(match("]", "[!]]"));
}

TEST_F(MatchFilePathTest, InvalidBrackets) {
EXPECT_TRUE(match("[", "["));
EXPECT_TRUE(match("[]", "[]"));
EXPECT_TRUE(match("[!", "[!"));
EXPECT_TRUE(match("[!]", "[!]"));
}

TEST_F(MatchFilePathTest, Range) {
EXPECT_TRUE(match("c", "[b-d]"));
EXPECT_FALSE(match("c", "[!b-d]"));
EXPECT_TRUE(match("y", "[b-dx-z]"));
EXPECT_FALSE(match("y", "[!b-dx-z]"));
}

TEST_F(MatchFilePathTest, Hyphen) {
EXPECT_FALSE(match("#", "[!-#]"));
EXPECT_FALSE(match("-", "[!--.]"));
EXPECT_TRUE(match("_", "[^-`]"));
EXPECT_TRUE(match("]", "[[-^]"));
EXPECT_TRUE(match("]", R"([\-^])"));
EXPECT_TRUE(match("-", "[b-]"));
EXPECT_FALSE(match("-", "[!b-]"));
EXPECT_TRUE(match("-", "[-b]"));
EXPECT_FALSE(match("-", "[!-b]"));
EXPECT_TRUE(match("-", "[-]"));
EXPECT_FALSE(match("-", "[!-]"));
}

TEST_F(MatchFilePathTest, UpperLELower) {
EXPECT_FALSE(match("c", "[d-b]"));
EXPECT_TRUE(match("c", "[!d-b]"));
EXPECT_TRUE(match("y", "[d-bx-z]"));
EXPECT_FALSE(match("y", "[!d-bx-z]"));
EXPECT_TRUE(match("_", "[d-b^-`]"));
EXPECT_TRUE(match("]", "[d-b[-^]"));
EXPECT_TRUE(match("b", "[b-b]"));
}

TEST_F(MatchFilePathTest, SlashAndBackslashInBrackets) {
EXPECT_FALSE(match("/", "[/]"));
EXPECT_TRUE(match("\\", R"([\])"));
EXPECT_TRUE(match("[/]", "[/]"));
EXPECT_TRUE(match("\\", R"([\t])"));
EXPECT_TRUE(match("t", R"([\t])"));
EXPECT_FALSE(match("\t", R"([\t])"));
}

TEST_F(MatchFilePathTest, SlashAndBackslashInRange) {
EXPECT_FALSE(match("a/b", "a[.-0]b"));
EXPECT_TRUE(match("a\\b", "a[Z-^]b"));
EXPECT_FALSE(match("a/b", "a[/-0]b"));
EXPECT_TRUE(match("a[/-0]b", "a[/-0]b"));
EXPECT_FALSE(match("a/b", "a[.-/]b"));
EXPECT_TRUE(match("a[.-/]b", "a[.-/]b"));
EXPECT_TRUE(match("a\\b", R"(a[\-^]b)"));
EXPECT_TRUE(match("a\\b", R"(a[Z-\]b)"));
}

TEST_F(MatchFilePathTest, Brackets) {
EXPECT_TRUE(match("[", "[[]"));
EXPECT_TRUE(match("&", "[a&&b]"));
EXPECT_TRUE(match("|", "[a||b]"));
EXPECT_TRUE(match("~", "[a~~b]"));
EXPECT_TRUE(match(",", "[a-z+--A-Z]"));
EXPECT_FALSE(match(".", "[a-z--/A-Z]"));
}

TEST_F(MatchFilePathTest, Path) {
EXPECT_TRUE(match(".clang-format", "*"));
EXPECT_TRUE(match(".git", "*git*"));
EXPECT_TRUE(match(".gitignore", "*git*"));
EXPECT_TRUE(match("foo/bar", "foo*/*bar"));
EXPECT_TRUE(match("foo/bar", "*/*"));
EXPECT_TRUE(match("foo/bar", R"(*foo*\/*bar*)"));
EXPECT_FALSE(match("foo/bar", "foo*"));
EXPECT_FALSE(match("foo/bar", "foo?bar"));
EXPECT_FALSE(match("foo/bar", "foo*bar"));
EXPECT_FALSE(match("foobar", "foo*/*"));
EXPECT_FALSE(match("foo\\", R"(foo*\)"));
}

} // namespace
} // namespace format
} // namespace clang