Skip to content

Commit 8f9803b

Browse files
authored
[clang-format] Add an fnmatch-like function for .clang-format-ignore (#76021)
This is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function.
1 parent 1dc715a commit 8f9803b

File tree

5 files changed

+315
-0
lines changed

5 files changed

+315
-0
lines changed

clang/lib/Format/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_clang_library(clangFormat
1111
IntegerLiteralSeparatorFixer.cpp
1212
MacroCallReconstructor.cpp
1313
MacroExpander.cpp
14+
MatchFilePath.cpp
1415
NamespaceEndCommentsFixer.cpp
1516
ObjCPropertyAttributeOrderFixer.cpp
1617
QualifierAlignmentFixer.cpp

clang/lib/Format/MatchFilePath.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file implements the functionality of matching a file path name to
11+
/// a pattern, similar to the POSIX fnmatch() function.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "MatchFilePath.h"
16+
17+
using namespace llvm;
18+
19+
namespace clang {
20+
namespace format {
21+
22+
// Check whether `FilePath` matches `Pattern` based on POSIX (1003.1-2008)
23+
// 2.13.1, 2.13.2, and Rule 1 of 2.13.3.
24+
bool matchFilePath(StringRef Pattern, StringRef FilePath) {
25+
assert(!Pattern.empty());
26+
assert(!FilePath.empty());
27+
28+
// No match if `Pattern` ends with a non-meta character not equal to the last
29+
// character of `FilePath`.
30+
if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
31+
return false;
32+
33+
constexpr auto Separator = '/';
34+
const auto EOP = Pattern.size(); // End of `Pattern`.
35+
const auto End = FilePath.size(); // End of `FilePath`.
36+
unsigned I = 0; // Index to `Pattern`.
37+
38+
for (unsigned J = 0; J < End; ++J) {
39+
if (I == EOP)
40+
return false;
41+
42+
switch (const auto F = FilePath[J]; Pattern[I]) {
43+
case '\\':
44+
if (++I == EOP || F != Pattern[I])
45+
return false;
46+
break;
47+
case '?':
48+
if (F == Separator)
49+
return false;
50+
break;
51+
case '*': {
52+
while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars.
53+
}
54+
const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
55+
const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
56+
if (I == EOP) // `Pattern` ends with a star.
57+
return NoMoreSeparatorsInFilePath;
58+
// `Pattern` ends with a lone backslash.
59+
if (Pattern[I] == '\\' && ++I == EOP)
60+
return false;
61+
// The star is followed by a (possibly escaped) `Separator`.
62+
if (Pattern[I] == Separator) {
63+
if (NoMoreSeparatorsInFilePath)
64+
return false;
65+
J = K; // Skip to next `Separator` in `FilePath`.
66+
break;
67+
}
68+
// Recurse.
69+
for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator;
70+
++J) {
71+
if (matchFilePath(Pat, FilePath.substr(J)))
72+
return true;
73+
}
74+
return false;
75+
}
76+
case '[':
77+
// Skip e.g. `[!]`.
78+
if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
79+
// Skip unpaired `[`, brackets containing slashes, and `[]`.
80+
if (const auto K = Pattern.find_first_of("]/", I + 1);
81+
K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
82+
if (F == Separator)
83+
return false;
84+
++I; // After the `[`.
85+
bool Negated = false;
86+
if (Pattern[I] == '!') {
87+
Negated = true;
88+
++I; // After the `!`.
89+
}
90+
bool Match = false;
91+
do {
92+
if (I + 2 < K && Pattern[I + 1] == '-') {
93+
Match = Pattern[I] <= F && F <= Pattern[I + 2];
94+
I += 3; // After the range, e.g. `A-Z`.
95+
} else {
96+
Match = F == Pattern[I++];
97+
}
98+
} while (!Match && I < K);
99+
if (Negated ? Match : !Match)
100+
return false;
101+
I = K + 1; // After the `]`.
102+
continue;
103+
}
104+
}
105+
[[fallthrough]]; // Match `[` literally.
106+
default:
107+
if (F != Pattern[I])
108+
return false;
109+
}
110+
111+
++I;
112+
}
113+
114+
// Match trailing stars with null strings.
115+
while (I < EOP && Pattern[I] == '*')
116+
++I;
117+
118+
return I == EOP;
119+
}
120+
121+
} // namespace format
122+
} // namespace clang

clang/lib/Format/MatchFilePath.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===--- MatchFilePath.h ----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
10+
#define LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
11+
12+
#include "llvm/ADT/StringRef.h"
13+
14+
namespace clang {
15+
namespace format {
16+
17+
bool matchFilePath(llvm::StringRef Pattern, llvm::StringRef FilePath);
18+
19+
} // end namespace format
20+
} // end namespace clang
21+
22+
#endif

clang/unittests/Format/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ add_clang_unittest(FormatTests
2727
IntegerLiteralSeparatorTest.cpp
2828
MacroCallReconstructorTest.cpp
2929
MacroExpanderTest.cpp
30+
MatchFilePathTest.cpp
3031
NamespaceEndCommentsFixerTest.cpp
3132
ObjCPropertyAttributeOrderFixerTest.cpp
3233
QualifierFixerTest.cpp
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
//===- unittest/Format/MatchFilePathTest.cpp ------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "../../lib/Format/MatchFilePath.h"
10+
#include "gtest/gtest.h"
11+
12+
namespace clang {
13+
namespace format {
14+
namespace {
15+
16+
class MatchFilePathTest : public ::testing::Test {
17+
protected:
18+
bool match(llvm::StringRef FilePath, llvm::StringRef Pattern) {
19+
return matchFilePath(Pattern, FilePath);
20+
}
21+
};
22+
23+
// Most of the test cases below are from:
24+
// https://github.com/python/cpython/blob/main/Lib/test/test_fnmatch.py
25+
26+
TEST_F(MatchFilePathTest, Wildcard) {
27+
EXPECT_TRUE(match("abc", "?*?"));
28+
EXPECT_TRUE(match("abc", "???*"));
29+
EXPECT_TRUE(match("abc", "*???"));
30+
EXPECT_TRUE(match("abc", "???"));
31+
EXPECT_TRUE(match("abc", "*"));
32+
EXPECT_TRUE(match("abc", "ab[cd]"));
33+
EXPECT_TRUE(match("abc", "ab[!de]"));
34+
EXPECT_FALSE(match("abc", "ab[de]"));
35+
EXPECT_FALSE(match("a", "??"));
36+
EXPECT_FALSE(match("a", "b"));
37+
}
38+
39+
TEST_F(MatchFilePathTest, Backslash) {
40+
EXPECT_TRUE(match("a?", R"(a\?)"));
41+
EXPECT_FALSE(match("a\\", R"(a\)"));
42+
EXPECT_TRUE(match("\\", R"([\])"));
43+
EXPECT_TRUE(match("a", R"([!\])"));
44+
EXPECT_FALSE(match("\\", R"([!\])"));
45+
}
46+
47+
TEST_F(MatchFilePathTest, Newline) {
48+
EXPECT_TRUE(match("foo\nbar", "foo*"));
49+
EXPECT_TRUE(match("foo\nbar\n", "foo*"));
50+
EXPECT_FALSE(match("\nfoo", "foo*"));
51+
EXPECT_TRUE(match("\n", "*"));
52+
}
53+
54+
TEST_F(MatchFilePathTest, Star) {
55+
EXPECT_TRUE(match(std::string(50, 'a'), "*a*a*a*a*a*a*a*a*a*a"));
56+
EXPECT_FALSE(match((std::string(50, 'a') + 'b'), "*a*a*a*a*a*a*a*a*a*a"));
57+
}
58+
59+
TEST_F(MatchFilePathTest, CaseSensitive) {
60+
EXPECT_TRUE(match("abc", "abc"));
61+
EXPECT_FALSE(match("AbC", "abc"));
62+
EXPECT_FALSE(match("abc", "AbC"));
63+
EXPECT_TRUE(match("AbC", "AbC"));
64+
}
65+
66+
TEST_F(MatchFilePathTest, PathSeparators) {
67+
EXPECT_TRUE(match("usr/bin", "usr/bin"));
68+
EXPECT_TRUE(match("usr\\bin", R"(usr\\bin)"));
69+
}
70+
71+
TEST_F(MatchFilePathTest, NumericEscapeSequence) {
72+
EXPECT_TRUE(match("test", "te*"));
73+
EXPECT_TRUE(match("test\xff", "te*\xff"));
74+
EXPECT_TRUE(match("foo\nbar", "foo*"));
75+
}
76+
77+
TEST_F(MatchFilePathTest, ValidBrackets) {
78+
EXPECT_TRUE(match("z", "[az]"));
79+
EXPECT_FALSE(match("z", "[!az]"));
80+
EXPECT_TRUE(match("a", "[aa]"));
81+
EXPECT_TRUE(match("^", "[^az]"));
82+
EXPECT_TRUE(match("[", "[[az]"));
83+
EXPECT_FALSE(match("]", "[!]]"));
84+
}
85+
86+
TEST_F(MatchFilePathTest, InvalidBrackets) {
87+
EXPECT_TRUE(match("[", "["));
88+
EXPECT_TRUE(match("[]", "[]"));
89+
EXPECT_TRUE(match("[!", "[!"));
90+
EXPECT_TRUE(match("[!]", "[!]"));
91+
}
92+
93+
TEST_F(MatchFilePathTest, Range) {
94+
EXPECT_TRUE(match("c", "[b-d]"));
95+
EXPECT_FALSE(match("c", "[!b-d]"));
96+
EXPECT_TRUE(match("y", "[b-dx-z]"));
97+
EXPECT_FALSE(match("y", "[!b-dx-z]"));
98+
}
99+
100+
TEST_F(MatchFilePathTest, Hyphen) {
101+
EXPECT_FALSE(match("#", "[!-#]"));
102+
EXPECT_FALSE(match("-", "[!--.]"));
103+
EXPECT_TRUE(match("_", "[^-`]"));
104+
EXPECT_TRUE(match("]", "[[-^]"));
105+
EXPECT_TRUE(match("]", R"([\-^])"));
106+
EXPECT_TRUE(match("-", "[b-]"));
107+
EXPECT_FALSE(match("-", "[!b-]"));
108+
EXPECT_TRUE(match("-", "[-b]"));
109+
EXPECT_FALSE(match("-", "[!-b]"));
110+
EXPECT_TRUE(match("-", "[-]"));
111+
EXPECT_FALSE(match("-", "[!-]"));
112+
}
113+
114+
TEST_F(MatchFilePathTest, UpperLELower) {
115+
EXPECT_FALSE(match("c", "[d-b]"));
116+
EXPECT_TRUE(match("c", "[!d-b]"));
117+
EXPECT_TRUE(match("y", "[d-bx-z]"));
118+
EXPECT_FALSE(match("y", "[!d-bx-z]"));
119+
EXPECT_TRUE(match("_", "[d-b^-`]"));
120+
EXPECT_TRUE(match("]", "[d-b[-^]"));
121+
EXPECT_TRUE(match("b", "[b-b]"));
122+
}
123+
124+
TEST_F(MatchFilePathTest, SlashAndBackslashInBrackets) {
125+
EXPECT_FALSE(match("/", "[/]"));
126+
EXPECT_TRUE(match("\\", R"([\])"));
127+
EXPECT_TRUE(match("[/]", "[/]"));
128+
EXPECT_TRUE(match("\\", R"([\t])"));
129+
EXPECT_TRUE(match("t", R"([\t])"));
130+
EXPECT_FALSE(match("\t", R"([\t])"));
131+
}
132+
133+
TEST_F(MatchFilePathTest, SlashAndBackslashInRange) {
134+
EXPECT_FALSE(match("a/b", "a[.-0]b"));
135+
EXPECT_TRUE(match("a\\b", "a[Z-^]b"));
136+
EXPECT_FALSE(match("a/b", "a[/-0]b"));
137+
EXPECT_TRUE(match("a[/-0]b", "a[/-0]b"));
138+
EXPECT_FALSE(match("a/b", "a[.-/]b"));
139+
EXPECT_TRUE(match("a[.-/]b", "a[.-/]b"));
140+
EXPECT_TRUE(match("a\\b", R"(a[\-^]b)"));
141+
EXPECT_TRUE(match("a\\b", R"(a[Z-\]b)"));
142+
}
143+
144+
TEST_F(MatchFilePathTest, Brackets) {
145+
EXPECT_TRUE(match("[", "[[]"));
146+
EXPECT_TRUE(match("&", "[a&&b]"));
147+
EXPECT_TRUE(match("|", "[a||b]"));
148+
EXPECT_TRUE(match("~", "[a~~b]"));
149+
EXPECT_TRUE(match(",", "[a-z+--A-Z]"));
150+
EXPECT_FALSE(match(".", "[a-z--/A-Z]"));
151+
}
152+
153+
TEST_F(MatchFilePathTest, Path) {
154+
EXPECT_TRUE(match(".clang-format", "*"));
155+
EXPECT_TRUE(match(".git", "*git*"));
156+
EXPECT_TRUE(match(".gitignore", "*git*"));
157+
EXPECT_TRUE(match("foo/bar", "foo*/*bar"));
158+
EXPECT_TRUE(match("foo/bar", "*/*"));
159+
EXPECT_TRUE(match("foo/bar", R"(*foo*\/*bar*)"));
160+
EXPECT_FALSE(match("foo/bar", "foo*"));
161+
EXPECT_FALSE(match("foo/bar", "foo?bar"));
162+
EXPECT_FALSE(match("foo/bar", "foo*bar"));
163+
EXPECT_FALSE(match("foobar", "foo*/*"));
164+
EXPECT_FALSE(match("foo\\", R"(foo*\)"));
165+
}
166+
167+
} // namespace
168+
} // namespace format
169+
} // namespace clang

0 commit comments

Comments
 (0)