Skip to content

Commit 3327d88

Browse files
committed
[AMDGPU] ]Correctly merge noalias scopes during lowering of LDS data.
Currently, if there is already noalias metadata present on loads and stores, lower module lds pass is generating a more onservative aliasing set. This results in inhibiting scheduling intrinsics that would have otherwise generated a better pipelined instruction. The fix is not to always intersect already existing noalias metadata with noalias created for lowering of LDS. But to insersect only if noalias scopes are from the same domain, otherwise concatenate exising noalias sets with LDS noalias.
1 parent bbaf743 commit 3327d88

File tree

8 files changed

+186
-16
lines changed

8 files changed

+186
-16
lines changed

llvm/include/llvm/Analysis/ScopedNoAliasAA.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ class ScopedNoAliasAAResult : public AAResultBase {
4343
ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
4444
AAQueryInfo &AAQI);
4545

46+
void collectScopedDomains(const MDNode *NoAlias,
47+
SmallPtrSetImpl<const MDNode*> &Domains) const;
4648
private:
4749
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
4850
};

llvm/lib/Analysis/ScopedNoAliasAA.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,17 +114,26 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
114114
Nodes.insert(MD);
115115
}
116116

117+
// Collect the set of scope domains relevant to the noalias scopes.
118+
void ScopedNoAliasAAResult::collectScopedDomains(const MDNode *NoAlias,
119+
SmallPtrSetImpl<const MDNode*> &Domains) const {
120+
if (!NoAlias)
121+
return;
122+
assert(Domains.empty() && "Domains should be empty");
123+
for (const MDOperand &MDOp : NoAlias->operands())
124+
if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
125+
if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
126+
Domains.insert(Domain);
127+
}
128+
117129
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
118130
const MDNode *NoAlias) const {
119131
if (!Scopes || !NoAlias)
120132
return true;
121133

122134
// Collect the set of scope domains relevant to the noalias scopes.
123135
SmallPtrSet<const MDNode *, 16> Domains;
124-
for (const MDOperand &MDOp : NoAlias->operands())
125-
if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
126-
if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
127-
Domains.insert(Domain);
136+
collectScopedDomains(NoAlias, Domains);
128137

129138
// We alias unless, for some domain, the set of noalias scopes in that domain
130139
// is a superset of the set of alias scopes in that domain.

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@
185185
#include "llvm/ADT/DenseSet.h"
186186
#include "llvm/ADT/STLExtras.h"
187187
#include "llvm/ADT/SetOperations.h"
188+
#include "llvm/Analysis/ScopedNoAliasAA.h"
188189
#include "llvm/Analysis/CallGraph.h"
189190
#include "llvm/CodeGen/TargetPassConfig.h"
190191
#include "llvm/IR/Constants.h"
@@ -244,6 +245,7 @@ template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
244245

245246
class AMDGPULowerModuleLDS {
246247
const AMDGPUTargetMachine &TM;
248+
friend class ScopedNoAliasAAResult;
247249

248250
static void
249251
removeLocalVarsFromUsedLists(Module &M,
@@ -1424,14 +1426,12 @@ class AMDGPULowerModuleLDS {
14241426

14251427
Align A =
14261428
commonAlignment(Replacement.SGV->getAlign().valueOrOne(), Offset);
1427-
14281429
if (I)
14291430
NoAliasList[I - 1] = AliasScopes[I - 1];
14301431
MDNode *NoAlias =
14311432
NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
14321433
MDNode *AliasScope =
14331434
AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
1434-
14351435
refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
14361436
}
14371437
}
@@ -1442,6 +1442,8 @@ class AMDGPULowerModuleLDS {
14421442
if (!MaxDepth || (A == 1 && !AliasScope))
14431443
return;
14441444

1445+
ScopedNoAliasAAResult ScopedNoAlias;
1446+
14451447
for (User *U : Ptr->users()) {
14461448
if (auto *I = dyn_cast<Instruction>(U)) {
14471449
if (AliasScope && I->mayReadOrWriteMemory()) {
@@ -1451,7 +1453,17 @@ class AMDGPULowerModuleLDS {
14511453
I->setMetadata(LLVMContext::MD_alias_scope, AS);
14521454

14531455
MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
1454-
NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
1456+
// If domain of NoAlias (domain of LDS structure) is different
1457+
// than existing NA, we need to preserve exising !NoAlias
1458+
SmallPtrSet<const MDNode *, 16> ExistingDomains, LDSDomains;
1459+
ScopedNoAlias.collectScopedDomains(NA, ExistingDomains);
1460+
ScopedNoAlias.collectScopedDomains(NoAlias, LDSDomains);
1461+
auto Diff = set_difference(ExistingDomains, LDSDomains);
1462+
if (Diff.empty()) {
1463+
NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
1464+
} else {
1465+
NA = (NA ? MDNode::concatenate(NA, NoAlias) : NoAlias);
1466+
}
14551467
I->setMetadata(LLVMContext::MD_noalias, NA);
14561468
}
14571469
}

llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ define amdgpu_kernel void @calls_f0() {
8484
define void @f0() {
8585
; CHECK-LABEL: define void @f0()
8686
; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
87-
; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
87+
; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29
8888
; CHECK-NEXT: ret void
8989
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
9090

llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs
1212
; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6
1313
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]]
1414
; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6
15-
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6
15+
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11
1616
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]]
17-
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6
17+
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11
1818
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
1919
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
2020
; CHECK-NEXT: ret void
@@ -48,4 +48,11 @@ bb:
4848
; CHECK:!3 = !{!"int", !4, i64 0}
4949
; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
5050
; CHECK:!5 = !{!"Simple C++ TBAA"}
51-
; CHECK:!6 = !{}
51+
; CHECK:!6 = !{!7, !9}
52+
; CHECK:!7 = distinct !{!7, !8}
53+
; CHECK:!8 = distinct !{!8}
54+
; CHECK:!9 = distinct !{!9, !10}
55+
; CHECK:!10 = distinct !{!10}
56+
; CHECK:!11 = !{!12, !13}
57+
; CHECK:!12 = distinct !{!12, !8}
58+
; CHECK:!13 = distinct !{!13, !10}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
3+
4+
@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
5+
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
6+
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
7+
8+
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
9+
; GCN-LABEL: ds_load_stores_aainfo:
10+
; GCN: ; %bb.0: ; %bb
11+
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
12+
; GCN-NEXT: v_mov_b32_e32 v0, 1
13+
; GCN-NEXT: v_mov_b32_e32 v1, 0
14+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
15+
; GCN-NEXT: s_lshl_b32 s0, s0, 2
16+
; GCN-NEXT: v_mov_b32_e32 v4, s0
17+
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
18+
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
19+
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
20+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
21+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
22+
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
23+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
24+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
25+
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
26+
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
27+
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
28+
; GCN-NEXT: s_endpgm
29+
bb:
30+
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
31+
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
32+
33+
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5
34+
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5
35+
36+
store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
37+
38+
%val = add i64 %val.a, %val.b
39+
store i64 %val, ptr addrspace(1) %arg, align 4
40+
41+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
42+
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
43+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
44+
ret void
45+
}
46+
47+
!0 = !{!"omnipotent char", !1, i64 0}
48+
!1 = !{!1}
49+
!2 = !{!3}
50+
!3 = distinct !{!3, !4}
51+
!4 = distinct !{!4}
52+
!5 = !{!3}
53+
!6 = !{!7}
54+
!7 = !{!7, !4}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
3+
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
4+
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
5+
6+
@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
7+
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
8+
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
9+
10+
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
11+
; GCN-LABEL: ds_load_stores_aainfo:
12+
; GCN: ; %bb.0: ; %bb
13+
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
14+
; GCN-NEXT: v_mov_b32_e32 v0, 1
15+
; GCN-NEXT: v_mov_b32_e32 v1, 0
16+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
17+
; GCN-NEXT: s_lshl_b32 s0, s0, 2
18+
; GCN-NEXT: v_mov_b32_e32 v4, s0
19+
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
20+
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
21+
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
22+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
23+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
24+
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
25+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
26+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
27+
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
28+
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
29+
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
30+
; GCN-NEXT: s_endpgm
31+
; CHECK-LABEL: define amdgpu_kernel void @ds_load_stores_aainfo(
32+
; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
33+
; CHECK-NEXT: [[BB:.*:]]
34+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 [[I]]
35+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 [[I]]
36+
; CHECK-NEXT: [[VAL_A:%.*]] = load i64, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1:![0-9]+]], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
37+
; CHECK-NEXT: [[VAL_B:%.*]] = load i64, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
38+
; CHECK-NEXT: store i64 1, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa [[TBAA1]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
39+
; CHECK-NEXT: [[VAL:%.*]] = add i64 [[VAL_A]], [[VAL_B]]
40+
; CHECK-NEXT: store i64 [[VAL]], ptr addrspace(1) [[ARG]], align 4
41+
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
42+
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
43+
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
44+
; CHECK-NEXT: ret void
45+
;
46+
bb:
47+
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
48+
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
49+
50+
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
51+
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
52+
53+
store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
54+
55+
%val = add i64 %val.a, %val.b
56+
store i64 %val, ptr addrspace(1) %arg, align 4
57+
58+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
59+
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
60+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
61+
ret void
62+
}
63+
64+
!0 = !{!"omnipotent char", !1, i64 0}
65+
!1 = !{!1}
66+
!2 = !{!3}
67+
!3 = distinct !{!3, !4}
68+
!4 = distinct !{!4}
69+
!5 = !{!3}
70+
;.
71+
; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0, i64 0}
72+
; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
73+
; CHECK: [[META3]] = distinct !{[[META3]]}
74+
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
75+
; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
76+
; CHECK: [[META6]] = distinct !{[[META6]]}
77+
; CHECK: [[META7]] = !{[[META8:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
78+
; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]}
79+
; CHECK: [[META9]] = distinct !{[[META9]]}
80+
; CHECK: [[META10]] = distinct !{[[META10]], [[META6]]}
81+
; CHECK: [[META11]] = distinct !{[[META11]], [[META6]]}
82+
; CHECK: [[META12]] = !{[[META10]]}
83+
; CHECK: [[META13]] = !{[[META8]], [[META5]], [[META11]]}
84+
; CHECK: [[META14]] = !{[[META11]]}
85+
; CHECK: [[META15]] = !{[[META8]], [[META5]], [[META10]]}
86+
;.

llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ define void @f0() {
6060

6161
define amdgpu_kernel void @k_f0() {
6262
; MODULE-LABEL: @k_f0(
63-
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
63+
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]]
6464
; MODULE-NEXT: call void @f0()
6565
; MODULE-NEXT: ret void
6666
;
@@ -83,9 +83,9 @@ define amdgpu_kernel void @k_f0() {
8383
@both.lds = addrspace(3) global i32 poison
8484
define void @f_both() {
8585
; MODULE-LABEL: @f_both(
86-
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
86+
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]]
8787
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
88-
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
88+
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]]
8989
; MODULE-NEXT: ret void
9090
;
9191
; TABLE-LABEL: @f_both(
@@ -116,9 +116,9 @@ define void @f_both() {
116116
define amdgpu_kernel void @k0_both() {
117117
; MODULE-LABEL: @k0_both(
118118
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
119-
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
119+
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
120120
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
121-
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
121+
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
122122
; MODULE-NEXT: call void @f_both()
123123
; MODULE-NEXT: ret void
124124
;

0 commit comments

Comments
 (0)