Skip to content

Commit e9a2aa6

Browse files
[amdgpu][lds] Use a consistent order of fields in generated structs
Avoids spurious and confusing test failures on changing implementation. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D136598
1 parent a7a0b82 commit e9a2aa6

File tree

2 files changed

+27
-15
lines changed

2 files changed

+27
-15
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -390,10 +390,22 @@ class AMDGPULowerModuleLDS : public ModulePass {
390390

391391
SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
392392
LayoutFields.reserve(LDSVarsToTransform.size());
393-
for (GlobalVariable *GV : LDSVarsToTransform) {
394-
OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
395-
AMDGPU::getAlign(DL, GV));
396-
LayoutFields.emplace_back(F);
393+
{
394+
// The order of fields in this struct depends on the order of
395+
// varables in the argument which varies when changing how they
396+
// are identified, leading to spurious test breakage.
397+
std::vector<GlobalVariable *> Sorted(LDSVarsToTransform.begin(),
398+
LDSVarsToTransform.end());
399+
llvm::sort(Sorted.begin(), Sorted.end(),
400+
[](const GlobalVariable *lhs, const GlobalVariable *rhs) {
401+
return lhs->getName() < rhs->getName();
402+
});
403+
for (GlobalVariable *GV : Sorted) {
404+
OptimizedStructLayoutField F(GV,
405+
DL.getTypeAllocSize(GV->getValueType()),
406+
AMDGPU::getAlign(DL, GV));
407+
LayoutFields.emplace_back(F);
408+
}
397409
}
398410

399411
performOptimizedStructLayout(LayoutFields);

llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@
33

44
; CHECK: %llvm.amdgcn.module.lds.t = type { float, float }
55

6-
@func = addrspace(3) global float undef, align 4
6+
@a_func = addrspace(3) global float undef, align 4
77

88
; CHECK: %llvm.amdgcn.kernel.timestwo.lds.t = type { float }
99

1010
@kern = addrspace(3) global float undef, align 4
1111

12-
; @func is only used from a non-kernel function so is rewritten
13-
; CHECK-NOT: @func
14-
; @both is used from a non-kernel function so is rewritten
15-
; CHECK-NOT: @both
16-
; sorted both < func, so @both at null and @func at 4
17-
@both = addrspace(3) global float undef, align 4
12+
; @a_func is only used from a non-kernel function so is rewritten
13+
; CHECK-NOT: @a_func
14+
; @b_both is used from a non-kernel function so is rewritten
15+
; CHECK-NOT: @b_both
16+
; sorted both < func, so @b_both at null and @a_func at 4
17+
@b_both = addrspace(3) global float undef, align 4
1818

1919
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4
2020
; CHECK: @llvm.amdgcn.kernel.timestwo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.timestwo.lds.t undef, align 4
@@ -32,7 +32,7 @@
3232
; CHECK: ret i32 %8
3333
define i32 @get_func() local_unnamed_addr #0 {
3434
entry:
35-
%0 = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
35+
%0 = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @a_func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @a_func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
3636
ret i32 %0
3737
}
3838

@@ -49,7 +49,7 @@ entry:
4949
; CHECK: ret void
5050
define void @set_func(i32 %x) local_unnamed_addr #1 {
5151
entry:
52-
store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
52+
store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
5353
ret void
5454
}
5555

@@ -78,9 +78,9 @@ entry:
7878
; CHECK: store i32 %mul, i32* %16, align 4
7979
; CHECK: ret void
8080
define amdgpu_kernel void @timestwo() {
81-
%ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
81+
%ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
8282
%mul = mul i32 %ld, 2
83-
store i32 %mul, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
83+
store i32 %mul, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @kern to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @b_both to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
8484
ret void
8585
}
8686

0 commit comments

Comments
 (0)