Skip to content

Commit 9cf2465

Browse files
authored
[AMDGPU] Fix spurious NoAlias results (#122309)
After a30e50f, AMDGPUAAResult is being called in more situations where BasicAA isn't sure. This exposed some regressions where NoAlias is being incorrectly returned for two identical pointers. The fix is to check the underlying objects for equality before returning NoAlias.
1 parent 58af82b commit 9cf2465

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,14 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
8080
} else if (const Argument *Arg = dyn_cast<Argument>(ObjA)) {
8181
const Function *F = Arg->getParent();
8282
switch (F->getCallingConv()) {
83-
case CallingConv::AMDGPU_KERNEL:
83+
case CallingConv::AMDGPU_KERNEL: {
8484
// In the kernel function, kernel arguments won't alias to (local)
8585
// variables in shared or private address space.
86-
return AliasResult::NoAlias;
86+
const auto *ObjB =
87+
getUnderlyingObject(B.Ptr->stripPointerCastsForAliasAnalysis());
88+
return ObjA != ObjB && isIdentifiedObject(ObjB) ? AliasResult::NoAlias
89+
: AliasResult::MayAlias;
90+
}
8791
default:
8892
// TODO: In the regular function, if that local variable in the
8993
// location B is not captured, that argument pointer won't alias to it

llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,20 @@ define void @test_9_9(ptr addrspace(9) %p, ptr addrspace(9) %p1) {
318318
load i8, ptr addrspace(9) %p1
319319
ret void
320320
}
321+
322+
; CHECK-LABEL: Function: test_kernel_arg_local_ptr
323+
; CHECK: MayAlias: i32 addrspace(3)* %arg, i32 addrspace(3)* %arg1
324+
; CHECK: MayAlias: i32 addrspace(3)* %arg, i32* %arg2
325+
; CHECK: MayAlias: i32 addrspace(3)* %arg1, i32* %arg2
326+
define amdgpu_kernel void @test_kernel_arg_local_ptr(ptr addrspace(3) %arg) {
327+
entry:
328+
%load1 = load i32, ptr addrspace(3) %arg, align 4
329+
%arg.plus.1 = getelementptr inbounds nuw i8, ptr addrspace(3) %arg, i64 1
330+
%arg1 = getelementptr inbounds nuw i8, ptr addrspace(3) %arg.plus.1, i64 -1
331+
%load2 = load i32, ptr addrspace(3) %arg1, align 4
332+
%arg.plus.4 = getelementptr inbounds nuw i8, ptr addrspace(3) %arg, i64 4
333+
%acast = addrspacecast ptr addrspace(3) %arg.plus.4 to ptr
334+
%arg2 = getelementptr inbounds i8, ptr %acast, i64 -4
335+
%load3 = load i32, ptr %arg2, align 4
336+
ret void
337+
}

0 commit comments

Comments
 (0)