Skip to content

Commit abe4145

Browse files
authored
Merge pull request #38175 from al45tair/faster-demangle
[Demangler] Make swift-demangle faster by not using regex.
2 parents deccb33 + 60772eb commit abe4145

File tree

1 file changed

+103
-10
lines changed

1 file changed

+103
-10
lines changed

tools/swift-demangle/swift-demangle.cpp

Lines changed: 103 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "llvm/Support/CommandLine.h"
2121
#include "llvm/Support/MemoryBuffer.h"
2222
#include "llvm/Support/PrettyStackTrace.h"
23-
#include "llvm/Support/Regex.h"
2423
#include "llvm/Support/Signals.h"
2524
#include "llvm/Support/raw_ostream.h"
2625

@@ -224,20 +223,114 @@ static void demangle(llvm::raw_ostream &os, llvm::StringRef name,
224223
DCtx.clear();
225224
}
226225

227-
static int demangleSTDIN(const swift::Demangle::DemangleOptions &options) {
228-
// This doesn't handle Unicode symbols, but maybe that's okay.
229-
// Also accept the future mangling prefix.
230-
llvm::Regex maybeSymbol("(_T|_?\\$[Ss])[_a-zA-Z0-9$.]+");
226+
static bool isValidInMangling(char ch) {
227+
return (ch == '_' || ch == '$' || ch == '.'
228+
|| (ch >= 'a' && ch <= 'z')
229+
|| (ch >= 'A' && ch <= 'Z')
230+
|| (ch >= '0' && ch <= '9'));
231+
}
232+
233+
static bool findMaybeMangled(llvm::StringRef input, llvm::StringRef &match) {
234+
const char *ptr = input.data();
235+
size_t len = input.size();
236+
const char *end = ptr + len;
237+
enum {
238+
Start,
239+
SeenUnderscore,
240+
SeenDollar,
241+
FoundPrefix
242+
} state = Start;
243+
const char *matchStart = nullptr;
244+
245+
// Find _T, $S, $s, _$S, _$s followed by a valid mangled string
246+
while (ptr < end) {
247+
switch (state) {
248+
case Start:
249+
while (ptr < end) {
250+
char ch = *ptr++;
251+
252+
if (ch == '_') {
253+
state = SeenUnderscore;
254+
matchStart = ptr - 1;
255+
break;
256+
} else if (ch == '$') {
257+
state = SeenDollar;
258+
matchStart = ptr - 1;
259+
break;
260+
}
261+
}
262+
break;
263+
264+
case SeenUnderscore:
265+
while (ptr < end) {
266+
char ch = *ptr++;
267+
268+
if (ch == 'T') {
269+
state = FoundPrefix;
270+
break;
271+
} else if (ch == '$') {
272+
state = SeenDollar;
273+
break;
274+
} else if (ch == '_') {
275+
matchStart = ptr - 1;
276+
} else {
277+
state = Start;
278+
break;
279+
}
280+
}
281+
break;
282+
283+
case SeenDollar:
284+
while (ptr < end) {
285+
char ch = *ptr++;
286+
287+
if (ch == 'S' || ch == 's') {
288+
state = FoundPrefix;
289+
break;
290+
} else if (ch == '_') {
291+
state = SeenUnderscore;
292+
matchStart = ptr - 1;
293+
break;
294+
} else if (ch == '$') {
295+
matchStart = ptr - 1;
296+
} else {
297+
state = Start;
298+
break;
299+
}
300+
}
301+
break;
231302

303+
case FoundPrefix:
304+
{
305+
const char *mangled = ptr;
306+
307+
while (ptr < end && isValidInMangling(*ptr))
308+
++ptr;
309+
310+
if (ptr == mangled) {
311+
state = Start;
312+
break;
313+
}
314+
315+
match = llvm::StringRef(matchStart, ptr - matchStart);
316+
return true;
317+
}
318+
}
319+
}
320+
321+
return false;
322+
}
323+
324+
static int demangleSTDIN(const swift::Demangle::DemangleOptions &options) {
232325
swift::Demangle::Context DCtx;
233326
for (std::string mangled; std::getline(std::cin, mangled);) {
234327
llvm::StringRef inputContents(mangled);
328+
llvm::StringRef match;
235329

236-
llvm::SmallVector<llvm::StringRef, 1> matches;
237-
while (maybeSymbol.match(inputContents, &matches)) {
238-
llvm::outs() << substrBefore(inputContents, matches.front());
239-
demangle(llvm::outs(), matches.front(), DCtx, options);
240-
inputContents = substrAfter(inputContents, matches.front());
330+
while (findMaybeMangled(inputContents, match)) {
331+
llvm::outs() << substrBefore(inputContents, match);
332+
demangle(llvm::outs(), match, DCtx, options);
333+
inputContents = substrAfter(inputContents, match);
241334
}
242335

243336
llvm::outs() << inputContents << '\n';

0 commit comments

Comments
 (0)