-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[llvm-debuginfo-analyzer] Add support for parsing DWARF / CodeView SourceLanguage #137223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm-debuginfo-analyzer] Add support for parsing DWARF / CodeView SourceLanguage #137223
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-debuginfo Author: Javier Lopez-Gomez (jalopezg-git) ChangesThis pull request adds support for parsing the source language in both DWARF and CodeView. Specifically,
FYI, @CarlosAlbertoEnciso. I believe this patch is ready too; feel free to start reviewing it. Full diff: https://github.com/llvm/llvm-project/pull/137223.diff 8 Files Affected:
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
index 17fa04040ad77..22c24d0c0592c 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
@@ -14,10 +14,13 @@
#ifndef LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVELEMENT_H
#define LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVELEMENT_H
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/LogicalView/Core/LVObject.h"
#include "llvm/Support/Casting.h"
#include <map>
#include <set>
+#include <variant>
#include <vector>
namespace llvm {
@@ -64,6 +67,22 @@ using LVElementKindSet = std::set<LVElementKind>;
using LVElementDispatch = std::map<LVElementKind, LVElementGetFunction>;
using LVElementRequest = std::vector<LVElementGetFunction>;
+/// A source language supported by any of the debug info representations.
+struct LVSourceLanguage {
+ LVSourceLanguage() = default;
+ LVSourceLanguage(llvm::dwarf::SourceLanguage SL) : Language(SL) {}
+ LVSourceLanguage(llvm::codeview::SourceLanguage SL) : Language(SL) {}
+
+ bool isValid() const { return Language.index() != 0; }
+ template <typename T> T getAs() { return std::get<T>(Language); }
+ StringRef getName() const;
+
+private:
+ std::variant<std::monostate, llvm::dwarf::SourceLanguage,
+ llvm::codeview::SourceLanguage>
+ Language;
+};
+
class LVElement : public LVObject {
enum class Property {
IsLine, // A logical line.
@@ -214,6 +233,9 @@ class LVElement : public LVObject {
virtual StringRef getProducer() const { return StringRef(); }
virtual void setProducer(StringRef ProducerName) {}
+ virtual LVSourceLanguage getSourceLanguage() const { return {}; }
+ virtual void setSourceLanguage(LVSourceLanguage SL) {}
+
virtual bool isCompileUnit() const { return false; }
virtual bool isRoot() const { return false; }
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
index 1b3c377cd7dbb..378f249029730 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
@@ -407,6 +407,9 @@ class LVScopeCompileUnit final : public LVScope {
// Toolchain producer.
size_t ProducerIndex = 0;
+ // Source language.
+ LVSourceLanguage SourceLanguage{};
+
// Compilation directory name.
size_t CompilationDirectoryIndex = 0;
@@ -540,6 +543,9 @@ class LVScopeCompileUnit final : public LVScope {
ProducerIndex = getStringPool().getIndex(ProducerName);
}
+ LVSourceLanguage getSourceLanguage() const override { return SourceLanguage; }
+ void setSourceLanguage(LVSourceLanguage SL) override { SourceLanguage = SL; }
+
void setCPUType(codeview::CPUType Type) { CompilationCPUType = Type; }
codeview::CPUType getCPUType() { return CompilationCPUType; }
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
index c6fb405baed1d..47aca07b7327e 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/LogicalView/Core/LVElement.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/LogicalView/Core/LVReader.h"
#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
@@ -19,6 +20,21 @@ using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::logicalview;
+StringRef LVSourceLanguage::getName() const {
+ if (!isValid())
+ return {};
+ switch (Language.index()) {
+ case 1: // DWARF
+ return llvm::dwarf::LanguageString(
+ std::get<llvm::dwarf::SourceLanguage>(Language));
+ case 2: // CodeView
+ static auto LangNames = llvm::codeview::getSourceLanguageNames();
+ return LangNames[std::get<llvm::codeview::SourceLanguage>(Language)].Name;
+ default:
+ llvm_unreachable("Unsupported language");
+ }
+}
+
#define DEBUG_TYPE "Element"
LVElementDispatch LVElement::Dispatch = {
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
index 8bbaf93db0caa..ae585567c9de9 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
@@ -1707,11 +1707,17 @@ void LVScopeCompileUnit::print(raw_ostream &OS, bool Full) const {
void LVScopeCompileUnit::printExtra(raw_ostream &OS, bool Full) const {
OS << formattedKind(kind()) << " '" << getName() << "'\n";
- if (options().getPrintFormatting() && options().getAttributeProducer())
+ if (options().getPrintFormatting() && options().getAttributeProducer()) {
printAttributes(OS, Full, "{Producer} ",
const_cast<LVScopeCompileUnit *>(this), getProducer(),
/*UseQuotes=*/true,
/*PrintRef=*/false);
+ if (auto SL = getSourceLanguage(); SL.isValid())
+ printAttributes(OS, Full, "{Language} ",
+ const_cast<LVScopeCompileUnit *>(this), SL.getName(),
+ /*UseQuotes=*/true,
+ /*PrintRef=*/false);
+ }
// Reset file index, to allow its children to print the correct filename.
options().resetFilenameIndex();
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
index 97214948d014a..3359cb8751923 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
@@ -947,8 +947,11 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
// The name of the CU, was extracted from the 'BuildInfo' subsection.
Reader->setCompileUnitCPUType(Compile2.Machine);
Scope->setName(CurrentObjectName);
- if (options().getAttributeProducer())
+ if (options().getAttributeProducer()) {
Scope->setProducer(Compile2.Version);
+ Scope->setSourceLanguage(LVSourceLanguage{
+ static_cast<llvm::codeview::SourceLanguage>(Compile2.getLanguage())});
+ }
getReader().isSystemEntry(Scope, CurrentObjectName);
// The line records in CodeView are recorded per Module ID. Update
@@ -992,8 +995,11 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
// The name of the CU, was extracted from the 'BuildInfo' subsection.
Reader->setCompileUnitCPUType(Compile3.Machine);
Scope->setName(CurrentObjectName);
- if (options().getAttributeProducer())
+ if (options().getAttributeProducer()) {
Scope->setProducer(Compile3.Version);
+ Scope->setSourceLanguage(LVSourceLanguage{
+ static_cast<llvm::codeview::SourceLanguage>(Compile3.getLanguage())});
+ }
getReader().isSystemEntry(Scope, CurrentObjectName);
// The line records in CodeView are recorded per Module ID. Update
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
index 42da957233667..e5bdd1ba614d6 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
@@ -383,6 +383,11 @@ void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
if (options().getAttributeProducer())
CurrentElement->setProducer(dwarf::toStringRef(FormValue));
break;
+ case dwarf::DW_AT_language:
+ if (options().getAttributeProducer())
+ CurrentElement->setSourceLanguage(LVSourceLanguage{
+ static_cast<llvm::dwarf::SourceLanguage>(GetAsUnsignedConstant())});
+ break;
case dwarf::DW_AT_upper_bound:
CurrentElement->setUpperBound(GetBoundValue(FormValue));
break;
diff --git a/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp b/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
index c93a79094dce9..c6bdda782a17f 100644
--- a/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
@@ -78,6 +78,11 @@ void checkElementPropertiesClangCodeview(LVReader *Reader) {
EXPECT_EQ(CompileUnit->getBaseAddress(), 0u);
EXPECT_TRUE(CompileUnit->getProducer().starts_with("clang"));
EXPECT_EQ(CompileUnit->getName(), "test.cpp");
+ LVSourceLanguage Language = CompileUnit->getSourceLanguage();
+ EXPECT_TRUE(Language.isValid());
+ ASSERT_EQ(Language.getAs<llvm::codeview::SourceLanguage>(),
+ llvm::codeview::SourceLanguage::Cpp);
+ ASSERT_EQ(Language.getName(), "Cpp");
EXPECT_EQ(Function->lineCount(), 16u);
EXPECT_EQ(Function->scopeCount(), 1u);
diff --git a/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp b/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp
index c062c15481da9..72d4bc1c7b7e5 100644
--- a/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp
@@ -72,6 +72,11 @@ void checkElementProperties(LVReader *Reader) {
EXPECT_EQ(CompileUnit->getBaseAddress(), 0u);
EXPECT_TRUE(CompileUnit->getProducer().starts_with("clang"));
EXPECT_EQ(CompileUnit->getName(), "test.cpp");
+ LVSourceLanguage Language = CompileUnit->getSourceLanguage();
+ EXPECT_TRUE(Language.isValid());
+ EXPECT_EQ(Language.getAs<llvm::dwarf::SourceLanguage>(),
+ llvm::dwarf::DW_LANG_C_plus_plus_14);
+ EXPECT_EQ(Language.getName(), "DW_LANG_C_plus_plus_14");
EXPECT_EQ(CompileUnit->lineCount(), 0u);
EXPECT_EQ(CompileUnit->scopeCount(), 1u);
|
af226a0
to
e75238a
Compare
5dfcb95
to
2e59c39
Compare
@jalopezg-git Thanks for doing this. What I would suggest is to include the command line option may be |
Thanks for the review, @CarlosAlbertoEnciso 👍! See my concern above about going for |
5e54a5c
to
b669e4f
Compare
Done too 👍; only missing discussion on whether to transition to |
✅ With the latest revision this PR passed the C/C++ code formatter. |
5ab70b8
to
ecbb30e
Compare
b86c309
to
22e90ef
Compare
424d87f
to
b215afe
Compare
b215afe
to
a9fc9e8
Compare
Also, as a general comment, the // Used by the CodeView Reader.
codeview::CPUType CompilationCPUType = codeview::CPUType::X64; (a |
a9fc9e8
to
b6ab357
Compare
…deViewLanguages.def (NFC) (#141750) This PR proposes moving out enumerators for `codeview::SourceLanguage` to a separate CodeViewLanguages.def file, following the same guideline that in other parts of LLVM, and in particular the `TypeRecordKind` (enumerators in CodeViewTypes.def) or `SymbolRecordKind` (enumerators in CodeViewSymbols.def). This is a non-functional change, and has been labeled as such. This change helps for #137223, and possibly other future changes.
b6ab357
to
4841af8
Compare
…ators to CodeViewLanguages.def (NFC) (#141750) This PR proposes moving out enumerators for `codeview::SourceLanguage` to a separate CodeViewLanguages.def file, following the same guideline that in other parts of LLVM, and in particular the `TypeRecordKind` (enumerators in CodeViewTypes.def) or `SymbolRecordKind` (enumerators in CodeViewSymbols.def). This is a non-functional change, and has been labeled as such. This change helps for llvm/llvm-project#137223, and possibly other future changes.
b9e15e5
to
65b9513
Compare
65b9513
to
d745794
Compare
d745794
to
5ed803c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@jalopezg-git Have you solved the issues with the request for write permissions? If not, I am happy to push on your behalf. |
Nope, not yet; I suspect that @tstellar may be on vacations as, if I am not mistaken, no requests were dispatched since 23rd May. Please, push it on my behalf at your earliest convenience 😉. |
…deViewLanguages.def (NFC) (llvm#141750) This PR proposes moving out enumerators for `codeview::SourceLanguage` to a separate CodeViewLanguages.def file, following the same guideline that in other parts of LLVM, and in particular the `TypeRecordKind` (enumerators in CodeViewTypes.def) or `SymbolRecordKind` (enumerators in CodeViewSymbols.def). This is a non-functional change, and has been labeled as such. This change helps for llvm#137223, and possibly other future changes.
…urceLanguage (llvm#137223) This pull request adds support for parsing the source language in both DWARF and CodeView. Specifically, - The `LVSourceLanguage` class is introduced to represent any supported language by any of the debug info representations. - Update `LVDWARFReader.cpp` and `LVCodeViewVisitor.cpp` to parse the source language where it applies. Added a new `=Language` attribute; `getAttributeLanguage()` is internally used to control whether this information is being printed.
…deViewLanguages.def (NFC) (llvm#141750) This PR proposes moving out enumerators for `codeview::SourceLanguage` to a separate CodeViewLanguages.def file, following the same guideline that in other parts of LLVM, and in particular the `TypeRecordKind` (enumerators in CodeViewTypes.def) or `SymbolRecordKind` (enumerators in CodeViewSymbols.def). This is a non-functional change, and has been labeled as such. This change helps for llvm#137223, and possibly other future changes.
…urceLanguage (llvm#137223) This pull request adds support for parsing the source language in both DWARF and CodeView. Specifically, - The `LVSourceLanguage` class is introduced to represent any supported language by any of the debug info representations. - Update `LVDWARFReader.cpp` and `LVCodeViewVisitor.cpp` to parse the source language where it applies. Added a new `=Language` attribute; `getAttributeLanguage()` is internally used to control whether this information is being printed.
…urceLanguage (llvm#137223) This pull request adds support for parsing the source language in both DWARF and CodeView. Specifically, - The `LVSourceLanguage` class is introduced to represent any supported language by any of the debug info representations. - Update `LVDWARFReader.cpp` and `LVCodeViewVisitor.cpp` to parse the source language where it applies. Added a new `=Language` attribute; `getAttributeLanguage()` is internally used to control whether this information is being printed.
This pull request adds support for parsing the source language in both DWARF and CodeView. Specifically,
The
LVSourceLanguage
class is introduced to represent any supported language by any of the debug info representations.Update
LVDWARFReader.cpp
andLVCodeViewVisitor.cpp
to parse the source language where it applies. Added a new=Language
attribute;getAttributeLanguage()
is internally used to control whether this information is being printed.