|
17 | 17 |
|
18 | 18 | using namespace llvm;
|
19 | 19 |
|
20 |
| -static bool hasWildcard(StringRef S) { |
21 |
| - return S.find_first_of("?*[\\") != StringRef::npos; |
22 |
| -} |
23 |
| - |
24 | 20 | // Expands character ranges and returns a bitmap.
|
25 | 21 | // For example, "a-cf-hz" is expanded to "abcfghz".
|
26 | 22 | static Expected<BitVector> expand(StringRef S, StringRef Original) {
|
@@ -58,120 +54,95 @@ static Expected<BitVector> expand(StringRef S, StringRef Original) {
|
58 | 54 | return BV;
|
59 | 55 | }
|
60 | 56 |
|
61 |
| -// This is a scanner for the glob pattern. |
62 |
| -// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]" |
63 |
| -// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is |
64 |
| -// equivalent to "[^<chars>]"), or a non-meta character. |
65 |
| -// This function returns the first token in S. |
66 |
| -static Expected<BitVector> scan(StringRef &S, StringRef Original) { |
67 |
| - switch (S[0]) { |
68 |
| - case '*': |
69 |
| - S = S.substr(1); |
70 |
| - // '*' is represented by an empty bitvector. |
71 |
| - // All other bitvectors are 256-bit long. |
72 |
| - return BitVector(); |
73 |
| - case '?': |
74 |
| - S = S.substr(1); |
75 |
| - return BitVector(256, true); |
76 |
| - case '[': { |
77 |
| - // ']' is allowed as the first character of a character class. '[]' is |
78 |
| - // invalid. So, just skip the first character. |
79 |
| - size_t End = S.find(']', 2); |
80 |
| - if (End == StringRef::npos) |
81 |
| - return make_error<StringError>("invalid glob pattern: " + Original, |
82 |
| - errc::invalid_argument); |
83 |
| - |
84 |
| - StringRef Chars = S.substr(1, End - 1); |
85 |
| - S = S.substr(End + 1); |
86 |
| - if (Chars.startswith("^") || Chars.startswith("!")) { |
87 |
| - Expected<BitVector> BV = expand(Chars.substr(1), Original); |
88 |
| - if (!BV) |
89 |
| - return BV.takeError(); |
90 |
| - return BV->flip(); |
91 |
| - } |
92 |
| - return expand(Chars, Original); |
93 |
| - } |
94 |
| - case '\\': |
95 |
| - // Eat this character and fall through below to treat it like a non-meta |
96 |
| - // character. |
97 |
| - S = S.substr(1); |
98 |
| - [[fallthrough]]; |
99 |
| - default: |
100 |
| - BitVector BV(256, false); |
101 |
| - BV[(uint8_t)S[0]] = true; |
102 |
| - S = S.substr(1); |
103 |
| - return BV; |
104 |
| - } |
105 |
| -} |
106 |
| - |
107 | 57 | Expected<GlobPattern> GlobPattern::create(StringRef S) {
|
108 | 58 | GlobPattern Pat;
|
109 | 59 |
|
110 |
| - // S doesn't contain any metacharacter, |
111 |
| - // so the regular string comparison should work. |
112 |
| - if (!hasWildcard(S)) { |
113 |
| - Pat.Exact = S; |
114 |
| - return Pat; |
115 |
| - } |
116 |
| - |
117 |
| - // S is something like "foo*", and the "* is not escaped. We can use |
118 |
| - // startswith(). |
119 |
| - if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { |
120 |
| - Pat.Prefix = S.drop_back(); |
| 60 | + // Store the prefix that does not contain any metacharacter. |
| 61 | + size_t PrefixSize = S.find_first_of("?*[\\"); |
| 62 | + Pat.Prefix = S.substr(0, PrefixSize); |
| 63 | + if (PrefixSize == std::string::npos) |
121 | 64 | return Pat;
|
122 |
| - } |
123 |
| - |
124 |
| - // S is something like "*foo". We can use endswith(). |
125 |
| - if (S.startswith("*") && !hasWildcard(S.drop_front())) { |
126 |
| - Pat.Suffix = S.drop_front(); |
127 |
| - return Pat; |
128 |
| - } |
129 |
| - |
130 |
| - // Otherwise, we need to do real glob pattern matching. |
131 |
| - // Parse the pattern now. |
132 | 65 | StringRef Original = S;
|
133 |
| - while (!S.empty()) { |
134 |
| - Expected<BitVector> BV = scan(S, Original); |
135 |
| - if (!BV) |
136 |
| - return BV.takeError(); |
137 |
| - Pat.Tokens.push_back(*BV); |
| 66 | + S = S.substr(PrefixSize); |
| 67 | + |
| 68 | + // Parse brackets. |
| 69 | + Pat.Pat = S; |
| 70 | + for (size_t I = 0, E = S.size(); I != E; ++I) { |
| 71 | + if (S[I] == '[') { |
| 72 | + // ']' is allowed as the first character of a character class. '[]' is |
| 73 | + // invalid. So, just skip the first character. |
| 74 | + ++I; |
| 75 | + size_t J = S.find(']', I + 1); |
| 76 | + if (J == StringRef::npos) |
| 77 | + return make_error<StringError>("invalid glob pattern: " + Original, |
| 78 | + errc::invalid_argument); |
| 79 | + StringRef Chars = S.substr(I, J - I); |
| 80 | + bool Invert = S[I] == '^' || S[I] == '!'; |
| 81 | + Expected<BitVector> BV = |
| 82 | + Invert ? expand(Chars.substr(1), S) : expand(Chars, S); |
| 83 | + if (!BV) |
| 84 | + return BV.takeError(); |
| 85 | + if (Invert) |
| 86 | + BV->flip(); |
| 87 | + Pat.Brackets.push_back(Bracket{S.data() + J + 1, std::move(*BV)}); |
| 88 | + I = J; |
| 89 | + } else if (S[I] == '\\') { |
| 90 | + if (++I == E) |
| 91 | + return make_error<StringError>("invalid glob pattern, stray '\\'", |
| 92 | + errc::invalid_argument); |
| 93 | + } |
138 | 94 | }
|
139 | 95 | return Pat;
|
140 | 96 | }
|
141 | 97 |
|
142 | 98 | bool GlobPattern::match(StringRef S) const {
|
143 |
| - if (Exact) |
144 |
| - return S == *Exact; |
145 |
| - if (Prefix) |
146 |
| - return S.startswith(*Prefix); |
147 |
| - if (Suffix) |
148 |
| - return S.endswith(*Suffix); |
149 |
| - return matchOne(Tokens, S); |
| 99 | + return S.consume_front(Prefix) && matchOne(S); |
150 | 100 | }
|
151 | 101 |
|
152 |
| -// Runs glob pattern Pats against string S. |
153 |
| -bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const { |
154 |
| - for (;;) { |
155 |
| - if (Pats.empty()) |
156 |
| - return S.empty(); |
157 |
| - |
158 |
| - // If Pats[0] is '*', try to match Pats[1..] against all possible |
159 |
| - // tail strings of S to see at least one pattern succeeds. |
160 |
| - if (Pats[0].size() == 0) { |
161 |
| - Pats = Pats.slice(1); |
162 |
| - if (Pats.empty()) |
163 |
| - // Fast path. If a pattern is '*', it matches anything. |
164 |
| - return true; |
165 |
| - for (size_t I = 0, E = S.size(); I < E; ++I) |
166 |
| - if (matchOne(Pats, S.substr(I))) |
167 |
| - return true; |
168 |
| - return false; |
| 102 | +// Factor the pattern into segments split by '*'. The segment is matched |
| 103 | +// sequentianlly by finding the first occurrence past the end of the previous |
| 104 | +// match. |
| 105 | +bool GlobPattern::matchOne(StringRef Str) const { |
| 106 | + const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(), |
| 107 | + *SavedS = S; |
| 108 | + const char *const PEnd = P + Pat.size(), *const End = S + Str.size(); |
| 109 | + size_t B = 0, SavedB = 0; |
| 110 | + while (S != End) { |
| 111 | + if (P == PEnd) |
| 112 | + ; |
| 113 | + else if (*P == '*') { |
| 114 | + // The non-* substring on the left of '*' matches the tail of S. Save the |
| 115 | + // positions to be used by backtracking if we see a mismatch later. |
| 116 | + SegmentBegin = ++P; |
| 117 | + SavedS = S; |
| 118 | + SavedB = B; |
| 119 | + continue; |
| 120 | + } else if (*P == '[') { |
| 121 | + if (Brackets[B].Bytes[uint8_t(*S)]) { |
| 122 | + P = Brackets[B++].Next; |
| 123 | + ++S; |
| 124 | + continue; |
| 125 | + } |
| 126 | + } else if (*P == '\\') { |
| 127 | + if (*++P == *S) { |
| 128 | + ++P; |
| 129 | + ++S; |
| 130 | + continue; |
| 131 | + } |
| 132 | + } else if (*P == *S || *P == '?') { |
| 133 | + ++P; |
| 134 | + ++S; |
| 135 | + continue; |
169 | 136 | }
|
170 |
| - |
171 |
| - // If Pats[0] is not '*', it must consume one character. |
172 |
| - if (S.empty() || !Pats[0][(uint8_t)S[0]]) |
| 137 | + if (!SegmentBegin) |
173 | 138 | return false;
|
174 |
| - Pats = Pats.slice(1); |
175 |
| - S = S.substr(1); |
| 139 | + // We have seen a '*'. Backtrack to the saved positions. Shift the S |
| 140 | + // position to probe the next starting position in the segment. |
| 141 | + P = SegmentBegin; |
| 142 | + S = ++SavedS; |
| 143 | + B = SavedB; |
176 | 144 | }
|
| 145 | + // All bytes in Str have been matched. Return true if the rest part of Pat is |
| 146 | + // empty or contains only '*'. |
| 147 | + return Pat.find_first_not_of('*', P - Pat.data()) == std::string::npos; |
177 | 148 | }
|
0 commit comments