Skip to content

Commit 319858e

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:0e42df4031e8 into amd-gfx:cf1a77e69a41
Local branch amd-gfx cf1a77e Merged main:96ef623a7525 into amd-gfx:cf4b971d071e Remote branch main 0e42df4 [AMDGPU][NFC] DWARF vector composite location description operations (llvm#71623)
2 parents cf1a77e + 0e42df4 commit 319858e

File tree

5 files changed

+96
-19
lines changed

5 files changed

+96
-19
lines changed

clang-tools-extra/clangd/index/CanonicalIncludes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,8 @@ const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = {
668668
{"bits/syslog-path.h", "<sys/syslog.h>"},
669669
{"bits/termios.h", "<termios.h>"},
670670
{"bits/types.h", "<sys/types.h>"},
671+
{"bits/types/siginfo_t.h", "<sys/siginfo.h>"},
672+
{"bits/types/struct_itimerspec.h", "<sys/time.h>"},
671673
{"bits/uio.h", "<sys/uio.h>"},
672674
{"bits/ustat.h", "<sys/ustat.h>"},
673675
{"bits/utmp.h", "<utmp.h>"},

llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -448,11 +448,34 @@ See ``DW_AT_LLVM_vector_size`` in :ref:`amdgpu-dwarf-base-type-entries`.
448448

449449
AMDGPU optimized code may spill vector registers to non-global address space
450450
memory, and this spilling may be done only for SIMT lanes that are active on
451-
entry to the subprogram.
452-
453-
To support this, a composite location description that can be created as a
454-
masked select is required. In addition, an operation that creates a composite
451+
entry to the subprogram. To support this the CFI rule for the partially spilled
452+
register needs to use an expression that uses the EXEC register as a bit mask to
453+
select between the register (for inactive lanes) and the stack spill location
454+
(for active lanes that are spilled). This needs to evaluate to a location
455+
description, and not a value, as a debugger needs to change the value if the
456+
user assigns to the variable.
457+
458+
Another usage is to create an expression that evaluates to provide a vector of
459+
logical PCs for active and inactive lanes in a SIMT execution model. Again the
460+
EXEC register is used to select between active and inactive PC values. In order
461+
to represent a vector of PC values, a way to create a composite location
462+
description that is a vector of a single location is used.
463+
464+
It may be possible to use existing DWARF to incrementally build the composite
465+
location description, possibly using the DWARF operations for control flow to
466+
create a loop. However, for the AMDGPU that would require loop iteration of 64.
467+
A concern is that the resulting DWARF would have a significant size and would be
468+
reasonably common as it is needed for every vector register that is spilled in a
469+
function. AMDGPU can have up to 512 vector registers. Another concern is the
470+
time taken to evaluate such non-trivial expressions repeatedly.
471+
472+
To avoid these issues, a composite location description that can be created as a
473+
masked select is proposed. In addition, an operation that creates a composite
455474
location description that is a vector on another location description is needed.
475+
These operations generate the composite location description using a single
476+
DWARF operation that combines all lanes of the vector in one step. The DWARF
477+
expression is more compact, and can be evaluated by a consumer far more
478+
efficiently.
456479

457480
An example that uses these operations is referenced in the
458481
:ref:`amdgpu-dwarf-further-examples` appendix.

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 480604
19+
#define LLVM_MAIN_REVISION 480607
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,16 @@ static RegisterScheduler
204204
defaultListDAGScheduler("default", "Best scheduler for the target",
205205
createDefaultScheduler);
206206

207+
static bool dontUseFastISelFor(const Function &Fn) {
208+
// Don't enable FastISel for functions with swiftasync Arguments.
209+
// Debug info on those is reliant on good Argument lowering, and FastISel is
210+
// not capable of lowering the entire function. Mixing the two selectors tend
211+
// to result in poor lowering of Arguments.
212+
return any_of(Fn.args(), [](const Argument &Arg) {
213+
return Arg.hasAttribute(Attribute::AttrKind::SwiftAsync);
214+
});
215+
}
216+
207217
namespace llvm {
208218

209219
//===--------------------------------------------------------------------===//
@@ -219,21 +229,23 @@ namespace llvm {
219229
: IS(ISel) {
220230
SavedOptLevel = IS.OptLevel;
221231
SavedFastISel = IS.TM.Options.EnableFastISel;
222-
if (NewOptLevel == SavedOptLevel)
223-
return;
224-
IS.OptLevel = NewOptLevel;
225-
IS.TM.setOptLevel(NewOptLevel);
226-
LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
227-
<< IS.MF->getFunction().getName() << "\n");
228-
LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(SavedOptLevel) << " ; After: -O"
229-
<< static_cast<int>(NewOptLevel) << "\n");
230-
if (NewOptLevel == CodeGenOptLevel::None) {
231-
IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
232-
LLVM_DEBUG(
233-
dbgs() << "\tFastISel is "
234-
<< (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
235-
<< "\n");
232+
if (NewOptLevel != SavedOptLevel) {
233+
IS.OptLevel = NewOptLevel;
234+
IS.TM.setOptLevel(NewOptLevel);
235+
LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
236+
<< IS.MF->getFunction().getName() << "\n");
237+
LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(SavedOptLevel)
238+
<< " ; After: -O" << static_cast<int>(NewOptLevel)
239+
<< "\n");
240+
if (NewOptLevel == CodeGenOptLevel::None)
241+
IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
236242
}
243+
if (dontUseFastISelFor(IS.MF->getFunction()))
244+
IS.TM.setFastISel(false);
245+
LLVM_DEBUG(
246+
dbgs() << "\tFastISel is "
247+
<< (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
248+
<< "\n");
237249
}
238250

239251
~OptLevelChanger() {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; RUN: llc %s --fast-isel=true --stop-after=finalize-isel -o %t \
2+
; RUN: -experimental-debug-variable-locations=false --global-isel=false
3+
; RUN: FileCheck %s < %t
4+
; RUN: FileCheck %s --check-prefix=INTRINSICS < %t
5+
6+
7+
source_filename = "ir_x86.ll"
8+
target triple = "x86_64-*"
9+
10+
define swifttailcc void @foo(ptr swiftasync %0) !dbg !43 {
11+
call void asm sideeffect "", "r"(ptr %0), !dbg !62
12+
; FastISEL doesn't preserve %0 here. Check that this function is lowered with SelectionDAG.
13+
call void @llvm.dbg.value(metadata ptr %0, metadata !54, metadata !DIExpression(DW_OP_plus_uconst, 4242)), !dbg !62
14+
ret void, !dbg !62
15+
}
16+
17+
; CHECK-NOT: DBG_VALUE $noreg
18+
; INTRINSICS: ![[VAR:[0-9]*]] = !DILocalVariable(name: "msg",
19+
; INTRINSICS: DBG_VALUE {{.*}}, ![[VAR]], !DIExpression(DW_OP_plus_uconst, 4242)
20+
21+
22+
declare void @llvm.dbg.value(metadata, metadata, metadata)
23+
24+
!llvm.module.flags = !{!6, !7, !8, !9, !10}
25+
!llvm.dbg.cu = !{!16}
26+
27+
!6 = !{i32 7, !"Dwarf Version", i32 4}
28+
!7 = !{i32 2, !"Debug Info Version", i32 3}
29+
!8 = !{i32 1, !"wchar_size", i32 4}
30+
!9 = !{i32 8, !"PIC Level", i32 2}
31+
!10 = !{i32 7, !"uwtable", i32 2}
32+
!16 = distinct !DICompileUnit(language: DW_LANG_Swift, file: !17, producer: "blah", emissionKind: FullDebug)
33+
!17 = !DIFile(filename: "blah", directory: "blah")
34+
!43 = distinct !DISubprogram(name: "blah", linkageName: "blah", file: !17, line: 87, type: !44, scopeLine: 87, unit: !16, retainedNodes: !48)
35+
!44 = !DISubroutineType(types: !45)
36+
!45 = !{!46}
37+
!46 = !DICompositeType(tag: DW_TAG_structure_type, name: "blah")
38+
!48 = !{!54}
39+
!54 = !DILocalVariable(name: "msg", arg: 1, scope: !43, file: !17, line: 87, type: !46)
40+
!62 = !DILocation(line: 87, column: 30, scope: !43)

0 commit comments

Comments
 (0)