Skip to content

Commit c7ef002

Browse files
authored
Fix performance bug in buildLocationList (llvm#109343)
In buildLocationList, with basic block sections, we iterate over every basic block twice to detect section start and end. This is sub-optimal and shows up as significantly time consuming when compiling large functions. This patch uses the set of sections already stored in MBBSectionRanges and iterates over sections rather than basic blocks. When detecting if loclists can be merged, the end label of an entry is matched with the beginning label of the next entry. For the section corresponding to the entry basic block, this is skipped. This is because the loc list uses the end label corresponding to the function whereas the MBBSectionRanges map uses the function end label. For example: .Lfunc_begin0: .file .loc 0 4 0 # ex2.cc:4:0 .cfi_startproc .Ltmp0: .loc 0 8 5 prologue_end # ex2.cc:8:5 .... .LBB_END0_0: .cfi_endproc .section .text._Z4testv,"ax",@progbits,unique,1 ... .Lfunc_end0: .size _Z4testv, .Lfunc_end0-_Z4testv The debug loc uses ".LBB_END0_0" for the end of the section whereas MBBSectionRanges uses ".Lfunc_end0". It is alright to skip this as we already check the section corresponding to the debugloc entry. Added a new test case to check that if this works correctly when the variable's value is mutated in the entry section.
1 parent 0d499f9 commit c7ef002

7 files changed

+501
-26
lines changed

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,6 +1774,12 @@ void AsmPrinter::emitFunctionBody() {
17741774
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
17751775

17761776
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1777+
// Create a slot for the entry basic block section so that the section
1778+
// order is preserved when iterating over MBBSectionRanges.
1779+
if (!MF->empty())
1780+
MBBSectionRanges[MF->front().getSectionID()] =
1781+
MBBSectionRange{CurrentFnBegin, nullptr};
1782+
17771783
for (auto &MBB : *MF) {
17781784
// Print a label for the basic block.
17791785
emitBasicBlockStart(MBB);
@@ -2052,11 +2058,8 @@ void AsmPrinter::emitFunctionBody() {
20522058
}
20532059
for (auto &Handler : Handlers)
20542060
Handler->markFunctionEnd();
2055-
2056-
assert(!MBBSectionRanges.contains(MF->front().getSectionID()) &&
2057-
"Overwrite section range");
2058-
MBBSectionRanges[MF->front().getSectionID()] =
2059-
MBBSectionRange{CurrentFnBegin, CurrentFnEnd};
2061+
// Update the end label of the entry block's section.
2062+
MBBSectionRanges[MF->front().getSectionID()].EndLabel = CurrentFnEnd;
20602063

20612064
// Print out jump tables referenced by the function.
20622065
emitJumpTableInfo();

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
3535
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
3636
#include "llvm/IR/Constants.h"
37+
#include "llvm/IR/DebugInfoMetadata.h"
3738
#include "llvm/IR/Function.h"
3839
#include "llvm/IR/GlobalVariable.h"
3940
#include "llvm/IR/Module.h"
@@ -1776,18 +1777,14 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
17761777
// span each individual section in the range from StartLabel to EndLabel.
17771778
if (Asm->MF->hasBBSections() && StartLabel == Asm->getFunctionBegin() &&
17781779
!Instr->getParent()->sameSection(&Asm->MF->front())) {
1779-
const MCSymbol *BeginSectionLabel = StartLabel;
1780-
1781-
for (const MachineBasicBlock &MBB : *Asm->MF) {
1782-
if (MBB.isBeginSection() && &MBB != &Asm->MF->front())
1783-
BeginSectionLabel = MBB.getSymbol();
1784-
1785-
if (MBB.sameSection(Instr->getParent())) {
1786-
DebugLoc.emplace_back(BeginSectionLabel, EndLabel, Values);
1780+
for (const auto &[MBBSectionId, MBBSectionRange] :
1781+
Asm->MBBSectionRanges) {
1782+
if (Instr->getParent()->getSectionID() == MBBSectionId) {
1783+
DebugLoc.emplace_back(MBBSectionRange.BeginLabel, EndLabel, Values);
17871784
break;
17881785
}
1789-
if (MBB.isEndSection())
1790-
DebugLoc.emplace_back(BeginSectionLabel, MBB.getEndSymbol(), Values);
1786+
DebugLoc.emplace_back(MBBSectionRange.BeginLabel,
1787+
MBBSectionRange.EndLabel, Values);
17911788
}
17921789
} else {
17931790
DebugLoc.emplace_back(StartLabel, EndLabel, Values);
@@ -1828,22 +1825,27 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
18281825
RangeMBB = &Asm->MF->front();
18291826
else
18301827
RangeMBB = Entries.begin()->getInstr()->getParent();
1828+
auto RangeIt = Asm->MBBSectionRanges.find(RangeMBB->getSectionID());
1829+
assert(RangeIt != Asm->MBBSectionRanges.end() &&
1830+
"Range MBB not found in MBBSectionRanges!");
18311831
auto *CurEntry = DebugLoc.begin();
18321832
auto *NextEntry = std::next(CurEntry);
1833+
auto NextRangeIt = std::next(RangeIt);
18331834
while (NextEntry != DebugLoc.end()) {
1834-
// Get the last machine basic block of this section.
1835-
while (!RangeMBB->isEndSection())
1836-
RangeMBB = RangeMBB->getNextNode();
1837-
if (!RangeMBB->getNextNode())
1835+
if (NextRangeIt == Asm->MBBSectionRanges.end())
18381836
return false;
18391837
// CurEntry should end the current section and NextEntry should start
18401838
// the next section and the Values must match for these two ranges to be
1841-
// merged.
1842-
if (CurEntry->getEndSym() != RangeMBB->getEndSymbol() ||
1843-
NextEntry->getBeginSym() != RangeMBB->getNextNode()->getSymbol() ||
1839+
// merged. Do not match the section label end if it is the entry block
1840+
// section. This is because the end label for the Debug Loc and the
1841+
// Function end label could be different.
1842+
if ((RangeIt->second.EndLabel != Asm->getFunctionEnd() &&
1843+
CurEntry->getEndSym() != RangeIt->second.EndLabel) ||
1844+
NextEntry->getBeginSym() != NextRangeIt->second.BeginLabel ||
18441845
CurEntry->getValues() != NextEntry->getValues())
18451846
return false;
1846-
RangeMBB = RangeMBB->getNextNode();
1847+
RangeIt = NextRangeIt;
1848+
NextRangeIt = std::next(RangeIt);
18471849
CurEntry = NextEntry;
18481850
NextEntry = std::next(CurEntry);
18491851
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=4 --basic-block-sections=none -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
2+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=4 --basic-block-sections=all -filetype=obj -o - | llvm-dwarfdump - | FileCheck --check-prefix=SECTIONS %s
3+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=5 --basic-block-sections=none -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
4+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=5 --basic-block-sections=all -filetype=obj -o - | llvm-dwarfdump - | FileCheck --check-prefix=SECTIONS %s
5+
6+
; CHECK: DW_TAG_variable
7+
; CHECK-NEXT: DW_AT_location
8+
; CHECK-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_consts +7, DW_OP_stack_value
9+
; CHECK-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_consts +8, DW_OP_stack_value
10+
; CHECK-NEXT: DW_AT_name ("i")
11+
12+
; SECTIONS: DW_TAG_variable
13+
; SECTIONS-NEXT: DW_AT_location
14+
; SECTIONS-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_consts +7, DW_OP_stack_value
15+
; SECTIONS-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_consts +8, DW_OP_stack_value
16+
; SECTIONS-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_consts +8, DW_OP_stack_value
17+
; SECTIONS-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_consts +8, DW_OP_stack_value
18+
; SECTIONS-NEXT: DW_AT_name ("i")
19+
20+
; Source to generate the IR below:
21+
; void f1();
22+
; extern bool b;
23+
; void test() {
24+
; // i is not a const throughout the whole scope and should
25+
; // not use DW_AT_const_value
26+
; int i = 7;
27+
; f1();
28+
; i = 8;
29+
; if (b)
30+
; f1();
31+
; }
32+
; $ clang++ -S loclist_section.cc -O2 -g -emit-llvm
33+
34+
@b = external local_unnamed_addr global i8, align 1
35+
36+
; Function Attrs: mustprogress uwtable
37+
define dso_local void @_Z4testv() local_unnamed_addr #0 !dbg !10 {
38+
entry:
39+
#dbg_value(i32 7, !14, !DIExpression(), !16)
40+
tail call void @_Z2f1v(), !dbg !17
41+
#dbg_value(i32 8, !14, !DIExpression(), !16)
42+
%0 = load i8, ptr @b, align 1, !dbg !18, !tbaa !20, !range !24, !noundef !25
43+
%loadedv = trunc nuw i8 %0 to i1, !dbg !18
44+
br i1 %loadedv, label %if.then, label %if.end, !dbg !26
45+
46+
if.then: ; preds = %entry
47+
tail call void @_Z2f1v(), !dbg !27
48+
br label %if.end, !dbg !27
49+
50+
if.end: ; preds = %if.then, %entry
51+
ret void, !dbg !28
52+
}
53+
54+
declare !dbg !29 void @_Z2f1v() local_unnamed_addr #1
55+
56+
attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
57+
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
58+
59+
!llvm.dbg.cu = !{!0}
60+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
61+
!llvm.ident = !{!9}
62+
63+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git ([email protected]:)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
64+
!1 = !DIFile(filename: "loclist_section.cc", directory: "Examples/debug_loc", checksumkind: CSK_MD5, checksum: "67769a94389681c8a6da481e2f358abb")
65+
!2 = !{i32 7, !"Dwarf Version", i32 5}
66+
!3 = !{i32 2, !"Debug Info Version", i32 3}
67+
!4 = !{i32 1, !"wchar_size", i32 4}
68+
!5 = !{i32 8, !"PIC Level", i32 2}
69+
!6 = !{i32 7, !"PIE Level", i32 2}
70+
!7 = !{i32 7, !"uwtable", i32 2}
71+
!8 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
72+
!9 = !{!"clang version 20.0.0git ([email protected]:.../llvm-project.git 7c3256280a78b0505ae4d43985c4d3239451a151)"}
73+
!10 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", scope: !1, file: !1, line: 3, type: !11, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
74+
!11 = !DISubroutineType(types: !12)
75+
!12 = !{null}
76+
!13 = !{!14}
77+
!14 = !DILocalVariable(name: "i", scope: !10, file: !1, line: 6, type: !15)
78+
!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
79+
!16 = !DILocation(line: 0, scope: !10)
80+
!17 = !DILocation(line: 7, column: 5, scope: !10)
81+
!18 = !DILocation(line: 9, column: 9, scope: !19)
82+
!19 = distinct !DILexicalBlock(scope: !10, file: !1, line: 9, column: 9)
83+
!20 = !{!21, !21, i64 0}
84+
!21 = !{!"bool", !22, i64 0}
85+
!22 = !{!"omnipotent char", !23, i64 0}
86+
!23 = !{!"Simple C++ TBAA"}
87+
!24 = !{i8 0, i8 2}
88+
!25 = !{}
89+
!26 = !DILocation(line: 9, column: 9, scope: !10)
90+
!27 = !DILocation(line: 10, column: 7, scope: !19)
91+
!28 = !DILocation(line: 11, column: 1, scope: !10)
92+
!29 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !11, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=4 --basic-block-sections=none -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
2+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=4 --basic-block-sections=all -filetype=obj -o - | llvm-dwarfdump - | FileCheck --check-prefix=SECTIONS %s
3+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=5 --basic-block-sections=none -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
4+
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu --dwarf-version=5 --basic-block-sections=all -filetype=obj -o - | llvm-dwarfdump - | FileCheck --check-prefix=SECTIONS %s
5+
6+
; CHECK: DW_TAG_lexical_block
7+
; CHECK-NEXT: DW_AT_low_pc
8+
; CHECK-NEXT: DW_AT_high_pc
9+
; CHECK: DW_TAG_variable
10+
; CHECK-NEXT: DW_AT_const_value (7)
11+
; CHECK-NEXT: DW_AT_name ("i")
12+
13+
; SECTIONS: DW_TAG_lexical_block
14+
; SECTIONS-NEXT: DW_AT_ranges
15+
; SECTIONS: DW_TAG_variable
16+
; SECTIONS-NEXT: DW_AT_const_value (7)
17+
; SECTIONS-NEXT: DW_AT_name ("i")
18+
19+
; Test to check that a variable declared within a scope that has basic block
20+
; sections still produces DW_AT_const_value.
21+
; Source to generate the IR below:
22+
23+
; void f1(int *);
24+
; extern bool b;
25+
; int test() {
26+
; // i is const throughout the whole scope and should
27+
; // use DW_AT_const_value. The scope creates basic
28+
; // block sections and should use DW_AT_ranges.
29+
; int j = 10;
30+
; {
31+
; int i = 7;
32+
; f1(&j);
33+
; if (b)
34+
; f1(&j);
35+
; }
36+
; return j;
37+
; }
38+
;
39+
; clang++ -S scoped_section_const.cc -g -O2 -emit-llvm
40+
41+
@b = external local_unnamed_addr global i8, align 1
42+
43+
; Function Attrs: mustprogress uwtable
44+
define dso_local noundef i32 @_Z4testv() local_unnamed_addr #0 !dbg !9 {
45+
%1 = alloca i32, align 4
46+
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1) #4, !dbg !17
47+
call void @llvm.dbg.value(metadata i32 10, metadata !14, metadata !DIExpression()), !dbg !18
48+
store i32 10, ptr %1, align 4, !dbg !19, !tbaa !20
49+
call void @llvm.dbg.value(metadata i32 7, metadata !15, metadata !DIExpression()), !dbg !24
50+
call void @llvm.dbg.value(metadata ptr %1, metadata !14, metadata !DIExpression(DW_OP_deref)), !dbg !18
51+
call void @_Z2f1Pi(ptr noundef nonnull %1), !dbg !25
52+
%2 = load i8, ptr @b, align 1, !dbg !26, !tbaa !28, !range !30, !noundef !31
53+
%3 = icmp eq i8 %2, 0, !dbg !26
54+
br i1 %3, label %5, label %4, !dbg !32
55+
56+
4: ; preds = %0
57+
call void @llvm.dbg.value(metadata ptr %1, metadata !14, metadata !DIExpression(DW_OP_deref)), !dbg !18
58+
call void @_Z2f1Pi(ptr noundef nonnull %1), !dbg !33
59+
br label %5, !dbg !33
60+
61+
5: ; preds = %4, %0
62+
%6 = load i32, ptr %1, align 4, !dbg !34, !tbaa !20
63+
call void @llvm.dbg.value(metadata i32 %6, metadata !14, metadata !DIExpression()), !dbg !18
64+
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1) #4, !dbg !35
65+
ret i32 %6, !dbg !36
66+
}
67+
68+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
69+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
70+
71+
declare !dbg !37 void @_Z2f1Pi(ptr noundef) local_unnamed_addr #2
72+
73+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
74+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
75+
76+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
77+
declare void @llvm.dbg.value(metadata, metadata, metadata) #3
78+
79+
attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
80+
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
81+
attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
82+
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
83+
attributes #4 = { nounwind }
84+
85+
!llvm.dbg.cu = !{!0}
86+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
87+
!llvm.ident = !{!8}
88+
89+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "Debian clang version 16.0.6 (26)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
90+
!1 = !DIFile(filename: "scoped_section_const.cc", directory: "", checksumkind: CSK_MD5, checksum: "0406492d2e2e38af35d9ea210ba1f24b")
91+
!2 = !{i32 7, !"Dwarf Version", i32 5}
92+
!3 = !{i32 2, !"Debug Info Version", i32 3}
93+
!4 = !{i32 1, !"wchar_size", i32 4}
94+
!5 = !{i32 8, !"PIC Level", i32 2}
95+
!6 = !{i32 7, !"PIE Level", i32 2}
96+
!7 = !{i32 7, !"uwtable", i32 2}
97+
!8 = !{!"Debian clang version 16.0.6 (26)"}
98+
!9 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
99+
!10 = !DISubroutineType(types: !11)
100+
!11 = !{!12}
101+
!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
102+
!13 = !{!14, !15}
103+
!14 = !DILocalVariable(name: "j", scope: !9, file: !1, line: 6, type: !12)
104+
!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 8, type: !12)
105+
!16 = distinct !DILexicalBlock(scope: !9, file: !1, line: 7, column: 5)
106+
!17 = !DILocation(line: 6, column: 5, scope: !9)
107+
!18 = !DILocation(line: 0, scope: !9)
108+
!19 = !DILocation(line: 6, column: 9, scope: !9)
109+
!20 = !{!21, !21, i64 0}
110+
!21 = !{!"int", !22, i64 0}
111+
!22 = !{!"omnipotent char", !23, i64 0}
112+
!23 = !{!"Simple C++ TBAA"}
113+
!24 = !DILocation(line: 0, scope: !16)
114+
!25 = !DILocation(line: 9, column: 7, scope: !16)
115+
!26 = !DILocation(line: 10, column: 11, scope: !27)
116+
!27 = distinct !DILexicalBlock(scope: !16, file: !1, line: 10, column: 11)
117+
!28 = !{!29, !29, i64 0}
118+
!29 = !{!"bool", !22, i64 0}
119+
!30 = !{i8 0, i8 2}
120+
!31 = !{}
121+
!32 = !DILocation(line: 10, column: 11, scope: !16)
122+
!33 = !DILocation(line: 11, column: 9, scope: !27)
123+
!34 = !DILocation(line: 13, column: 12, scope: !9)
124+
!35 = !DILocation(line: 14, column: 1, scope: !9)
125+
!36 = !DILocation(line: 13, column: 5, scope: !9)
126+
!37 = !DISubprogram(name: "f1", linkageName: "_Z2f1Pi", scope: !1, file: !1, line: 1, type: !38, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !31)
127+
!38 = !DISubroutineType(types: !39)
128+
!39 = !{null, !40}
129+
!40 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)

0 commit comments

Comments
 (0)