Skip to content

[AMDGPU][LowerModuleLDS] Refactor partially lowered module detection #85793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,26 +340,11 @@ class AMDGPULowerModuleLDS {

// Get uses from the current function, excluding uses by called functions
// Two output variables to avoid walking the globals list twice
std::optional<bool> HasAbsoluteGVs;
for (auto &GV : M.globals()) {
if (!AMDGPU::isLDSVariableToLower(GV)) {
continue;
}

// Check if the module is consistent: either all GVs are absolute (happens
// when we run the pass more than once), or none are.
const bool IsAbsolute = GV.isAbsoluteSymbolRef();
if (HasAbsoluteGVs.has_value()) {
if (*HasAbsoluteGVs != IsAbsolute) {
report_fatal_error(
"Module cannot mix absolute and non-absolute LDS GVs");
}
} else
HasAbsoluteGVs = IsAbsolute;

if (IsAbsolute)
continue;

for (User *V : GV.users()) {
if (auto *I = dyn_cast<Instruction>(V)) {
Function *F = I->getFunction();
Expand Down Expand Up @@ -469,6 +454,31 @@ class AMDGPULowerModuleLDS {
}
}

// Verify that we fall into one of 2 cases:
// - All variables are absolute: this is a re-run of the pass
// so we don't have anything to do.
// - No variables are absolute.
std::optional<bool> HasAbsoluteGVs;
for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
for (auto &[Fn, GVs] : Map) {
for (auto *GV : GVs) {
bool IsAbsolute = GV->isAbsoluteSymbolRef();
if (HasAbsoluteGVs.has_value()) {
if (*HasAbsoluteGVs != IsAbsolute) {
report_fatal_error(
"Module cannot mix absolute and non-absolute LDS GVs");
}
} else
HasAbsoluteGVs = IsAbsolute;
}
}
}

// If we only had absolute GVs, we have nothing to do, return an empty
// result.
if (HasAbsoluteGVs && *HasAbsoluteGVs)
return {FunctionVariableMap(), FunctionVariableMap()};

return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
}

Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-addresses-unused.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s

; This looks like a partially lowered module, but the non-lowered GV isn't used by any kernels.
; In such cases, LowerModuleLDS is free to leave it in and ignore it, and we want to make sure
; LowerModuleLDS doesn't crash if it re-runs on such modules.
@notLowered = addrspace(3) global i32 poison
@lowered = addrspace(3) global i32 poison, !absolute_symbol !0

@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @notLowered to ptr)], section "llvm.metadata"

define amdgpu_kernel void @kern(i32 %val0) {
; CHECK-LABEL: define amdgpu_kernel void @kern(
; CHECK-SAME: i32 [[VAL0:%.*]]) {
; CHECK-NEXT: [[VAL1:%.*]] = add i32 [[VAL0]], 4
; CHECK-NEXT: store i32 [[VAL1]], ptr addrspace(3) @lowered, align 4
; CHECK-NEXT: ret void
;
%val1 = add i32 %val0, 4
store i32 %val1, ptr addrspace(3) @lowered
ret void
}


!0 = !{i32 0, i32 1}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
define amdgpu_kernel void @kern() {
%val0 = load i32, ptr addrspace(3) @var1
%val1 = add i32 %val0, 4
store i32 %val1, ptr addrspace(3) @var1
store i32 %val1, ptr addrspace(3) @var2
ret void
}

Expand Down