Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 6568fb5

Browse files
author
Zachary Turner
committed
[MS Demangler] Fix several crashes and demangling bugs.
These bugs were found by writing a Python script which spidered the entire Chromium build directory tree demangling every symbol in every object file. At the start, the tool printed: Processed 27443 object files. 2926377/2936108 symbols successfully demangled (99.6686%) 9731 symbols could not be demangled (0.3314%) 14589 files crashed while demangling (53.1611%) After this patch, it prints: Processed 27443 object files. 41295518/41295617 symbols successfully demangled (99.9998%) 99 symbols could not be demangled (0.0002%) 0 files crashed while demangling (0.0000%) The issues fixed in this patch are: * Ignore empty parameter packs. Previously we would encounter a mangling for an empty parameter pack and add a null node to the AST. Since we don't print these anyway, we now just don't add anything to the AST and ignore it entirely. This fixes some of the crashes. * Account for "incorrect" string literal demanglings. Apparently an older version of clang would not truncate mangled string literals to 32 bytes of encoded character data. The demangling code however would allocate a 32 byte buffer thinking that it would not encounter more than this, and overrun the buffer. We now demangle up to 128 bytes of data, since the buggy clang would encode up to 32 *characters* of data. * Extended support for demangling init-fini stubs. If you had something like struct Foo { static vector<string> S; }; this would generate a dynamic atexit initializer *for the variable*. We didn't handle this, but now we print something nice. This is actually an improvement over undname, which will fail to demangle this at all. * Fixed one case of static this adjustment. We weren't handling several thunk codes so we didn't recognize the mangling. These are now handled. * Fixed a back-referencing problem. Member pointer templates should have their components considered for back-referencing The remaining 99 symbols which can't be demangled are all symbols which are compiler-generated and undname can't demangle either. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341000 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 47dc019 commit 6568fb5

File tree

6 files changed

+82
-28
lines changed

6 files changed

+82
-28
lines changed

lib/Demangle/MicrosoftDemangle.cpp

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,8 @@ class Demangler {
350350
VariableSymbolNode *
351351
demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
352352
StringView &MangledName);
353-
FunctionSymbolNode *demangleDynamicStructorFunction(StringView &MangledName,
354-
bool IsDestructor);
353+
FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName,
354+
bool IsDestructor);
355355

356356
NamedIdentifierNode *demangleSimpleName(StringView &MangledName,
357357
bool Memorize);
@@ -520,16 +520,35 @@ Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
520520
return VSN;
521521
}
522522

523-
FunctionSymbolNode *
524-
Demangler::demangleDynamicStructorFunction(StringView &MangledName,
525-
bool IsDestructor) {
523+
FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
524+
bool IsDestructor) {
526525
DynamicStructorIdentifierNode *DSIN =
527526
Arena.alloc<DynamicStructorIdentifierNode>();
528527
DSIN->IsDestructor = IsDestructor;
529-
DSIN->Name = demangleFullyQualifiedTypeName(MangledName);
530-
QualifiedNameNode *QNN = synthesizeQualifiedName(Arena, DSIN);
531-
FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
532-
FSN->Name = QNN;
528+
529+
// What follows is a main symbol name. This may include namespaces or class
530+
// back references.
531+
QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
532+
533+
SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
534+
FunctionSymbolNode *FSN = nullptr;
535+
Symbol->Name = QN;
536+
537+
if (Symbol->kind() == NodeKind::VariableSymbol) {
538+
DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
539+
if (!MangledName.consumeFront('@')) {
540+
Error = true;
541+
return nullptr;
542+
}
543+
544+
FSN = demangleFunctionEncoding(MangledName);
545+
FSN->Name = synthesizeQualifiedName(Arena, DSIN);
546+
} else {
547+
FSN = static_cast<FunctionSymbolNode *>(Symbol);
548+
DSIN->Name = Symbol->Name;
549+
FSN->Name = synthesizeQualifiedName(Arena, DSIN);
550+
}
551+
533552
return FSN;
534553
}
535554

@@ -569,9 +588,9 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
569588
case SpecialIntrinsicKind::RttiBaseClassDescriptor:
570589
return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
571590
case SpecialIntrinsicKind::DynamicInitializer:
572-
return demangleDynamicStructorFunction(MangledName, false);
591+
return demangleInitFiniStub(MangledName, false);
573592
case SpecialIntrinsicKind::DynamicAtexitDestructor:
574-
return demangleDynamicStructorFunction(MangledName, true);
593+
return demangleInitFiniStub(MangledName, true);
575594
default:
576595
break;
577596
}
@@ -837,6 +856,8 @@ SymbolNode *Demangler::parse(StringView &MangledName) {
837856
// What follows is a main symbol name. This may include namespaces or class
838857
// back references.
839858
QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
859+
if (Error)
860+
return nullptr;
840861

841862
SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
842863
if (Symbol) {
@@ -1325,10 +1346,9 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
13251346
goto StringLiteralError;
13261347
}
13271348
} else {
1328-
if (StringByteSize > 32)
1329-
Result->IsTruncated = true;
1330-
1331-
constexpr unsigned MaxStringByteLength = 32;
1349+
// The max byte length is actually 32, but some compilers mangled strings
1350+
// incorrectly, so we have to assume it can go higher.
1351+
constexpr unsigned MaxStringByteLength = 32 * 4;
13321352
uint8_t StringBytes[MaxStringByteLength];
13331353

13341354
unsigned BytesDecoded = 0;
@@ -1337,6 +1357,9 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
13371357
StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
13381358
}
13391359

1360+
if (StringByteSize > BytesDecoded)
1361+
Result->IsTruncated = true;
1362+
13401363
unsigned CharBytes =
13411364
guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
13421365
assert(StringByteSize % CharBytes == 0);
@@ -1587,6 +1610,10 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
15871610
return FuncClass(FC_Private | FC_Virtual);
15881611
case 'F':
15891612
return FuncClass(FC_Private | FC_Virtual);
1613+
case 'G':
1614+
return FuncClass(FC_Private | FC_StaticThisAdjust);
1615+
case 'H':
1616+
return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
15901617
case 'I':
15911618
return FuncClass(FC_Protected);
15921619
case 'J':
@@ -1760,7 +1787,6 @@ TypeNode *Demangler::demangleType(StringView &MangledName,
17601787
Ty = demangleCustomType(MangledName);
17611788
} else {
17621789
Ty = demanglePrimitiveType(MangledName);
1763-
assert(Ty && !Error);
17641790
if (!Ty || Error)
17651791
return Ty;
17661792
}
@@ -1976,14 +2002,14 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
19762002
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
19772003

19782004
if (MangledName.consumeFront("8")) {
1979-
Pointer->ClassParent = demangleFullyQualifiedSymbolName(MangledName);
2005+
Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
19802006
Pointer->Pointee = demangleFunctionType(MangledName, true);
19812007
} else {
19822008
Qualifiers PointeeQuals = Q_None;
19832009
bool IsMember = false;
19842010
std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
19852011
assert(IsMember);
1986-
Pointer->ClassParent = demangleFullyQualifiedSymbolName(MangledName);
2012+
Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
19872013

19882014
Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
19892015
Pointer->Pointee->Quals = PointeeQuals;
@@ -2121,18 +2147,21 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
21212147
size_t Count = 0;
21222148

21232149
while (!Error && !MangledName.startsWith('@')) {
2150+
if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
2151+
MangledName.consumeFront("$$$V")) {
2152+
// Empty parameter pack.
2153+
continue;
2154+
}
2155+
21242156
++Count;
2157+
21252158
// Template parameter lists don't participate in back-referencing.
21262159
*Current = Arena.alloc<NodeList>();
21272160

21282161
NodeList &TP = **Current;
21292162

21302163
TemplateParameterReferenceNode *TPRN = nullptr;
2131-
if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
2132-
MangledName.consumeFront("$$$V")) {
2133-
// Empty parameter pack.
2134-
TP.N = nullptr;
2135-
} else if (MangledName.consumeFront("$$Y")) {
2164+
if (MangledName.consumeFront("$$Y")) {
21362165
// Template alias
21372166
TP.N = demangleFullyQualifiedTypeName(MangledName);
21382167
} else if (MangledName.consumeFront("$$B")) {

lib/Demangle/MicrosoftDemangleNodes.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,15 @@ void DynamicStructorIdentifierNode::output(OutputStream &OS,
223223
else
224224
OS << "`dynamic initializer for ";
225225

226-
OS << "'";
227-
Name->output(OS, Flags);
228-
OS << "''";
226+
if (Variable) {
227+
OS << "`";
228+
Variable->output(OS, Flags);
229+
OS << "''";
230+
} else {
231+
OS << "'";
232+
Name->output(OS, Flags);
233+
OS << "''";
234+
}
229235
}
230236

231237
void NamedIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {

lib/Demangle/MicrosoftDemangleNodes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ enum class NodeKind {
322322
LocalStaticGuardVariable,
323323
FunctionSymbol,
324324
VariableSymbol,
325-
SpecialTableSymbol,
325+
SpecialTableSymbol
326326
};
327327

328328
struct Node {
@@ -443,6 +443,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
443443

444444
void output(OutputStream &OS, OutputFlags Flags) const override;
445445

446+
VariableSymbolNode *Variable = nullptr;
446447
QualifiedNameNode *Name = nullptr;
447448
bool IsDestructor = false;
448449
};

test/Demangle/ms-back-references.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,6 @@
169169

170170
?AddEmitPasses@EmitAssemblyHelper@?A0x43583946@@AEAA_NAEAVPassManager@legacy@llvm@@W4BackendAction@clang@@AEAVraw_pwrite_stream@5@PEAV85@@Z
171171
; CHECK: bool __cdecl `anonymous namespace'::EmitAssemblyHelper::AddEmitPasses(class llvm::legacy::PassManager &, enum clang::BackendAction, class llvm::raw_pwrite_stream &, class llvm::raw_pwrite_stream *)
172+
173+
??$forward@P8?$DecoderStream@$01@media@@AEXXZ@std@@YA$$QAP8?$DecoderStream@$01@media@@AEXXZAAP812@AEXXZ@Z
174+
; CHECK: void (__thiscall media::DecoderStream<2>::*&& __cdecl std::forward<void (__thiscall media::DecoderStream<2>::*)(void)>(void (__thiscall media::DecoderStream<2>::*&)(void)))(void)

test/Demangle/ms-operators.test

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@
161161
??_EBase@@UEAAPEAXI@Z
162162
; CHECK: virtual void * __cdecl Base::`vector deleting dtor'(unsigned int)
163163

164+
??_EBase@@G3AEPAXI@Z
165+
; CHECK: [thunk]: void * __thiscall Base::`vector deleting dtor'`adjustor{4}'(unsigned int)
166+
164167
??_F?$SomeTemplate@H@@QAEXXZ
165168
; CHECK: void __thiscall SomeTemplate<int>::`default ctor closure'(void)
166169

@@ -224,6 +227,9 @@
224227
??__FFoo@@YAXXZ
225228
; CHECK: void __cdecl `dynamic atexit destructor for 'Foo''(void)
226229

230+
??__F_decisionToDFA@XPathLexer@@0V?$vector@VDFA@dfa@antlr4@@V?$allocator@VDFA@dfa@antlr4@@@std@@@std@@A@YAXXZ
231+
; CHECK: void __cdecl `dynamic atexit destructor for `static class std::vector<class antlr4::dfa::DFA, class std::allocator<class antlr4::dfa::DFA>> XPathLexer::_decisionToDFA''(void)
232+
227233
??__K_deg@@YAHO@Z
228234
; CHECK: int __cdecl operator ""_deg(long double)
229235

test/Demangle/ms-string-literals.test

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,4 +761,13 @@
761761
; CHECK: const char16_t * {u"012345678901234"}
762762

763763
??_C@_0CA@KFPHPCC@0?$AA?$AA?$AA1?$AA?$AA?$AA2?$AA?$AA?$AA3?$AA?$AA?$AA4?$AA?$AA?$AA5?$AA?$AA?$AA6?$AA?$AA?$AA?$AA?$AA?$AA?$AA@
764-
; CHECK: const char32_t * {U"0123456"}
764+
; CHECK: const char32_t * {U"0123456"}
765+
766+
; There are too many bytes encoded in this string literal (it should encode a max of 32 bytes)
767+
; but some buggy compilers will incorrectly generate this, so we need to be able to demangle
768+
; both the correct and incorrect versions.
769+
??_C@_0CG@HJGBPLNO@l?$AAo?$AAo?$AAk?$AAA?$AAh?$AAe?$AAa?$AAd?$AAH?$AAa?$AAr?$AAd?$AAB?$AAr?$AAe?$AAa?$AAk?$AA?$AA?$AA@
770+
; CHECK: const char16_t * {u"lookAheadHardBreak"}
771+
772+
??_C@_0CG@HJGBPLNO@l?$AAo?$AAo?$AAk?$AAA?$AAh?$AAe?$AAa?$AAd?$AAH?$AAa?$AAr?$AAd?$AAB?$AAr?$AAe?$AA@
773+
; CHECK: const char16_t * {u"lookAheadHardBre"...}

0 commit comments

Comments
 (0)