Skip to content

Commit 7c7704c

Browse files
authored
[AMDGPU] Allow any linkage for dynlds (#84742)
Solves SWDEV-449592
1 parent 95ffa8a commit 7c7704c

File tree

2 files changed

+40
-7
lines changed

2 files changed

+40
-7
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,12 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
3232
}
3333

3434
bool isDynamicLDS(const GlobalVariable &GV) {
35-
// external zero size addrspace(3) without initializer implies cuda/hip extern
36-
// __shared__ the semantics for such a variable appears to be that all extern
37-
// __shared__ variables alias one another. This hits different handling.
35+
// external zero size addrspace(3) without initializer is dynlds.
3836
const Module *M = GV.getParent();
3937
const DataLayout &DL = M->getDataLayout();
40-
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
38+
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
4139
return false;
42-
}
43-
uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
44-
return GV.hasExternalLinkage() && AllocSize == 0;
40+
return DL.getTypeAllocSize(GV.getValueType()) == 0;
4541
}
4642

4743
bool isLDSVariableToLower(const GlobalVariable &GV) {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
3+
4+
; This is an extension and should be rejected by the front-end in most cases.
5+
; If it goes through, lower it as dynlds.
6+
7+
@Var0 = linkonce_odr hidden local_unnamed_addr addrspace(3) global [0 x float] poison
8+
9+
define void @fn(float %val, i32 %idx) {
10+
; CHECK-LABEL: define void @fn(
11+
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) {
12+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
13+
; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
14+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VAR0]], align 4
15+
; CHECK-NEXT: [[VAR01:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
16+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr addrspace(3) [[VAR01]], i32 [[IDX]]
17+
; CHECK-NEXT: store float [[VAL]], ptr addrspace(3) [[PTR]], align 4
18+
; CHECK-NEXT: ret void
19+
;
20+
%ptr = getelementptr i32, ptr addrspace(3) @Var0, i32 %idx
21+
store float %val, ptr addrspace(3) %ptr
22+
ret void
23+
}
24+
25+
define amdgpu_kernel void @kernelA(float %val, i32 %idx) {
26+
; CHECK-LABEL: define amdgpu_kernel void @kernelA(
27+
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] {
28+
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds) ]
29+
; CHECK-NEXT: tail call void @fn(float [[VAL]], i32 [[IDX]])
30+
; CHECK-NEXT: ret void
31+
;
32+
tail call void @fn(float %val, i32 %idx)
33+
ret void
34+
}
35+
;.
36+
; CHECK: [[META1]] = !{i32 0}
37+
;.

0 commit comments

Comments
 (0)