Skip to content

Commit ccb3a8f

Browse files
authored
[AMDGPU][LowerModuleLDS] Refactor partially lowered module detection (#85793)
Refactor the logic that checks if a module contains mixed absolute/non-lowered LDS GVs. The check now happens latter when the "worklists" are formed. This is because in some cases (OpenMP) we can have non-lowered GVs in a lowered module, and this is normal because those GVs are just unused and removed from the list at some point before the end of `getUsesOfLDSByFunction`. Doing the check later ensures that if a mixed module is spotted, then it's a _real_ mixed module that needs rejection, not a module containing an intentionally ignored GV.
1 parent 23de386 commit ccb3a8f

File tree

3 files changed

+52
-16
lines changed

3 files changed

+52
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -340,26 +340,11 @@ class AMDGPULowerModuleLDS {
340340

341341
// Get uses from the current function, excluding uses by called functions
342342
// Two output variables to avoid walking the globals list twice
343-
std::optional<bool> HasAbsoluteGVs;
344343
for (auto &GV : M.globals()) {
345344
if (!AMDGPU::isLDSVariableToLower(GV)) {
346345
continue;
347346
}
348347

349-
// Check if the module is consistent: either all GVs are absolute (happens
350-
// when we run the pass more than once), or none are.
351-
const bool IsAbsolute = GV.isAbsoluteSymbolRef();
352-
if (HasAbsoluteGVs.has_value()) {
353-
if (*HasAbsoluteGVs != IsAbsolute) {
354-
report_fatal_error(
355-
"Module cannot mix absolute and non-absolute LDS GVs");
356-
}
357-
} else
358-
HasAbsoluteGVs = IsAbsolute;
359-
360-
if (IsAbsolute)
361-
continue;
362-
363348
for (User *V : GV.users()) {
364349
if (auto *I = dyn_cast<Instruction>(V)) {
365350
Function *F = I->getFunction();
@@ -469,6 +454,31 @@ class AMDGPULowerModuleLDS {
469454
}
470455
}
471456

457+
// Verify that we fall into one of 2 cases:
458+
// - All variables are absolute: this is a re-run of the pass
459+
// so we don't have anything to do.
460+
// - No variables are absolute.
461+
std::optional<bool> HasAbsoluteGVs;
462+
for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
463+
for (auto &[Fn, GVs] : Map) {
464+
for (auto *GV : GVs) {
465+
bool IsAbsolute = GV->isAbsoluteSymbolRef();
466+
if (HasAbsoluteGVs.has_value()) {
467+
if (*HasAbsoluteGVs != IsAbsolute) {
468+
report_fatal_error(
469+
"Module cannot mix absolute and non-absolute LDS GVs");
470+
}
471+
} else
472+
HasAbsoluteGVs = IsAbsolute;
473+
}
474+
}
475+
}
476+
477+
// If we only had absolute GVs, we have nothing to do, return an empty
478+
// result.
479+
if (HasAbsoluteGVs && *HasAbsoluteGVs)
480+
return {FunctionVariableMap(), FunctionVariableMap()};
481+
472482
return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
473483
}
474484

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
3+
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
4+
5+
; This looks like a partially lowered module, but the non-lowered GV isn't used by any kernels.
6+
; In such cases, LowerModuleLDS is free to leave it in and ignore it, and we want to make sure
7+
; LowerModuleLDS doesn't crash if it re-runs on such modules.
8+
@notLowered = addrspace(3) global i32 poison
9+
@lowered = addrspace(3) global i32 poison, !absolute_symbol !0
10+
11+
@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @notLowered to ptr)], section "llvm.metadata"
12+
13+
define amdgpu_kernel void @kern(i32 %val0) {
14+
; CHECK-LABEL: define amdgpu_kernel void @kern(
15+
; CHECK-SAME: i32 [[VAL0:%.*]]) {
16+
; CHECK-NEXT: [[VAL1:%.*]] = add i32 [[VAL0]], 4
17+
; CHECK-NEXT: store i32 [[VAL1]], ptr addrspace(3) @lowered, align 4
18+
; CHECK-NEXT: ret void
19+
;
20+
%val1 = add i32 %val0, 4
21+
store i32 %val1, ptr addrspace(3) @lowered
22+
ret void
23+
}
24+
25+
26+
!0 = !{i32 0, i32 1}

llvm/test/CodeGen/AMDGPU/lds-reject-mixed-absolute-addresses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
define amdgpu_kernel void @kern() {
99
%val0 = load i32, ptr addrspace(3) @var1
1010
%val1 = add i32 %val0, 4
11-
store i32 %val1, ptr addrspace(3) @var1
11+
store i32 %val1, ptr addrspace(3) @var2
1212
ret void
1313
}
1414

0 commit comments

Comments
 (0)