Skip to content

[AMDGPU] Correctly merge noalias scopes during lowering of LDS data. #131664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/ScopedNoAliasAA.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class ScopedNoAliasAAResult : public AAResultBase {
ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
AAQueryInfo &AAQI);

void collectScopedDomains(const MDNode *NoAlias,
SmallPtrSetImpl<const MDNode *> &Domains) const;

private:
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
};
Expand Down
17 changes: 13 additions & 4 deletions llvm/lib/Analysis/ScopedNoAliasAA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,26 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
Nodes.insert(MD);
}

/// Collect the set of scoped domains relevant to the noalias scopes.
void ScopedNoAliasAAResult::collectScopedDomains(
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
if (!NoAlias)
return;
Comment on lines +120 to +121
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't include this null check. Either make this a reference, or instead have the instruction as the argument to query the scope metadata

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NoAlias is checked as a null check in line 131. Changing this here would require to change multiple other places unrelated to this patch.

assert(Domains.empty() && "Domains should be empty");
for (const MDOperand &MDOp : NoAlias->operands())
if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
Domains.insert(Domain);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this really need to be a set? I would hope the verifier enforces these entries must be unique to begin with

Copy link
Contributor Author

@srpande srpande Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Domain is defined as a set prior to this change. This patch just outlines this code into a different function such that it could be used in multiple places in compiler.

}

bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
const MDNode *NoAlias) const {
if (!Scopes || !NoAlias)
return true;

// Collect the set of scope domains relevant to the noalias scopes.
SmallPtrSet<const MDNode *, 16> Domains;
for (const MDOperand &MDOp : NoAlias->operands())
if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
Domains.insert(Domain);
collectScopedDomains(NoAlias, Domains);

// We alias unless, for some domain, the set of noalias scopes in that domain
// is a superset of the set of alias scopes in that domain.
Expand Down
32 changes: 31 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -1441,6 +1442,8 @@ class AMDGPULowerModuleLDS {
if (!MaxDepth || (A == 1 && !AliasScope))
return;

ScopedNoAliasAAResult ScopedNoAlias;

for (User *U : Ptr->users()) {
if (auto *I = dyn_cast<Instruction>(U)) {
if (AliasScope && I->mayReadOrWriteMemory()) {
Expand All @@ -1450,7 +1453,34 @@ class AMDGPULowerModuleLDS {
I->setMetadata(LLVMContext::MD_alias_scope, AS);

MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);

// Scoped aliases can originate from two different domains.
// First domain would be from LDS domain (created by this pass).
// All entries (LDS vars) into LDS struct will have same domain.

// Second domain could be existing scoped aliases that are the
// results of noalias params and subsequent optimizations that
// may alter thesse sets.

// We need to be careful how we create new alias sets, and
// have right scopes and domains for loads/stores of these new
// LDS variables. We intersect NoAlias set if alias sets belong
// to the same domain. This is the case if we have memcpy using
// LDS variables. Both src and dst of memcpy would belong to
// LDS struct, they donot alias.
// On the other hand, if one of the domains is LDS and other is
// existing domain prior to LDS, we need to have a union of all
// these aliases set to preserve existing aliasing information.

SmallPtrSet<const MDNode *, 16> ExistingDomains, LDSDomains;
ScopedNoAlias.collectScopedDomains(NA, ExistingDomains);
ScopedNoAlias.collectScopedDomains(NoAlias, LDSDomains);
auto Intersection = set_intersection(ExistingDomains, LDSDomains);
if (Intersection.empty()) {
NA = NA ? MDNode::concatenate(NA, NoAlias) : NoAlias;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code computes existing domains, but then resulting NA computation does not depend on it, it still uses the same NA and does not really use Diff or ExistingDomains. It is just replacing intersection with the concatenation, but not with the pre-existing info. What am I missing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is the set operation on domains. Depending on the result of set operation of domain, we perform set operations on alias set in order to have correct alias sets on loads/store of new elements of LDS structs.

} else {
NA = NA ? MDNode::intersect(NA, NoAlias) : NoAlias;
}
I->setMetadata(LLVMContext::MD_noalias, NA);
}
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ define amdgpu_kernel void @calls_f0() {
define void @f0() {
; CHECK-LABEL: define void @f0()
; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29
; CHECK-NEXT: ret void
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1

Expand Down
13 changes: 10 additions & 3 deletions llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs
; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -48,4 +48,11 @@ bb:
; CHECK:!3 = !{!"int", !4, i64 0}
; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
; CHECK:!5 = !{!"Simple C++ TBAA"}
; CHECK:!6 = !{}
; CHECK:!6 = !{!7, !9}
; CHECK:!7 = distinct !{!7, !8}
; CHECK:!8 = distinct !{!8}
; CHECK:!9 = distinct !{!9, !10}
; CHECK:!10 = distinct !{!10}
; CHECK:!11 = !{!12, !13}
; CHECK:!12 = distinct !{!12, !8}
; CHECK:!13 = distinct !{!13, !10}
54 changes: 54 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s

@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4

define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
; GCN-LABEL: ds_load_stores_aainfo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: v_mov_b32_e32 v4, s0
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
; GCN-NEXT: s_endpgm
bb:
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i

%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5

store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2

%val = add i64 %val.a, %val.b
store i64 %val, ptr addrspace(1) %arg, align 4

tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
ret void
}

!0 = !{!"omnipotent char", !1, i64 0}
!1 = !{!1}
!2 = !{!3}
!3 = distinct !{!3, !4}
!4 = distinct !{!4}
!5 = !{!3}
!6 = !{!7}
!7 = !{!7, !4}
86 changes: 86 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s

@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4

define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
; GCN-LABEL: ds_load_stores_aainfo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: v_mov_b32_e32 v4, s0
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
; GCN-NEXT: s_endpgm
; CHECK-LABEL: define amdgpu_kernel void @ds_load_stores_aainfo(
; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[BB:.*:]]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 [[I]]
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_A:%.*]] = load i64, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1:![0-9]+]], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[VAL_B:%.*]] = load i64, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
; CHECK-NEXT: store i64 1, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa [[TBAA1]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
; CHECK-NEXT: [[VAL:%.*]] = add i64 [[VAL_A]], [[VAL_B]]
; CHECK-NEXT: store i64 [[VAL]], ptr addrspace(1) [[ARG]], align 4
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
; CHECK-NEXT: ret void
;
bb:
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i

%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5

store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2

%val = add i64 %val.a, %val.b
store i64 %val, ptr addrspace(1) %arg, align 4

tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
ret void
}

!0 = !{!"omnipotent char", !1, i64 0}
!1 = !{!1}
!2 = !{!3}
!3 = distinct !{!3, !4}
!4 = distinct !{!4}
!5 = !{!3}
;.
; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0, i64 0}
; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
; CHECK: [[META3]] = distinct !{[[META3]]}
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
; CHECK: [[META6]] = distinct !{[[META6]]}
; CHECK: [[META7]] = !{[[META8:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]}
; CHECK: [[META9]] = distinct !{[[META9]]}
; CHECK: [[META10]] = distinct !{[[META10]], [[META6]]}
; CHECK: [[META11]] = distinct !{[[META11]], [[META6]]}
; CHECK: [[META12]] = !{[[META10]]}
; CHECK: [[META13]] = !{[[META8]], [[META5]], [[META11]]}
; CHECK: [[META14]] = !{[[META11]]}
; CHECK: [[META15]] = !{[[META8]], [[META5]], [[META10]]}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ define void @f0() {

define amdgpu_kernel void @k_f0() {
; MODULE-LABEL: @k_f0(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]]
; MODULE-NEXT: call void @f0()
; MODULE-NEXT: ret void
;
Expand All @@ -83,9 +83,9 @@ define amdgpu_kernel void @k_f0() {
@both.lds = addrspace(3) global i32 poison
define void @f_both() {
; MODULE-LABEL: @f_both(
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f_both(
Expand Down Expand Up @@ -116,9 +116,9 @@ define void @f_both() {
define amdgpu_kernel void @k0_both() {
; MODULE-LABEL: @k0_both(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
; MODULE-NEXT: call void @f_both()
; MODULE-NEXT: ret void
;
Expand Down
Loading